You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
427 lines
16 KiB
427 lines
16 KiB
5 months ago
|
"""
|
||
|
Joint Random Variables Module
|
||
|
|
||
|
See Also
|
||
|
========
|
||
|
sympy.stats.rv
|
||
|
sympy.stats.frv
|
||
|
sympy.stats.crv
|
||
|
sympy.stats.drv
|
||
|
"""
|
||
|
from math import prod
|
||
|
|
||
|
from sympy.core.basic import Basic
|
||
|
from sympy.core.function import Lambda
|
||
|
from sympy.core.singleton import S
|
||
|
from sympy.core.symbol import (Dummy, Symbol)
|
||
|
from sympy.core.sympify import sympify
|
||
|
from sympy.sets.sets import ProductSet
|
||
|
from sympy.tensor.indexed import Indexed
|
||
|
from sympy.concrete.products import Product
|
||
|
from sympy.concrete.summations import Sum, summation
|
||
|
from sympy.core.containers import Tuple
|
||
|
from sympy.integrals.integrals import Integral, integrate
|
||
|
from sympy.matrices import ImmutableMatrix, matrix2numpy, list2numpy
|
||
|
from sympy.stats.crv import SingleContinuousDistribution, SingleContinuousPSpace
|
||
|
from sympy.stats.drv import SingleDiscreteDistribution, SingleDiscretePSpace
|
||
|
from sympy.stats.rv import (ProductPSpace, NamedArgsMixin, Distribution,
|
||
|
ProductDomain, RandomSymbol, random_symbols,
|
||
|
SingleDomain, _symbol_converter)
|
||
|
from sympy.utilities.iterables import iterable
|
||
|
from sympy.utilities.misc import filldedent
|
||
|
from sympy.external import import_module
|
||
|
|
||
|
# __all__ = ['marginal_distribution']
|
||
|
|
||
|
class JointPSpace(ProductPSpace):
|
||
|
"""
|
||
|
Represents a joint probability space. Represented using symbols for
|
||
|
each component and a distribution.
|
||
|
"""
|
||
|
def __new__(cls, sym, dist):
|
||
|
if isinstance(dist, SingleContinuousDistribution):
|
||
|
return SingleContinuousPSpace(sym, dist)
|
||
|
if isinstance(dist, SingleDiscreteDistribution):
|
||
|
return SingleDiscretePSpace(sym, dist)
|
||
|
sym = _symbol_converter(sym)
|
||
|
return Basic.__new__(cls, sym, dist)
|
||
|
|
||
|
@property
|
||
|
def set(self):
|
||
|
return self.domain.set
|
||
|
|
||
|
@property
|
||
|
def symbol(self):
|
||
|
return self.args[0]
|
||
|
|
||
|
@property
|
||
|
def distribution(self):
|
||
|
return self.args[1]
|
||
|
|
||
|
@property
|
||
|
def value(self):
|
||
|
return JointRandomSymbol(self.symbol, self)
|
||
|
|
||
|
@property
|
||
|
def component_count(self):
|
||
|
_set = self.distribution.set
|
||
|
if isinstance(_set, ProductSet):
|
||
|
return S(len(_set.args))
|
||
|
elif isinstance(_set, Product):
|
||
|
return _set.limits[0][-1]
|
||
|
return S.One
|
||
|
|
||
|
@property
|
||
|
def pdf(self):
|
||
|
sym = [Indexed(self.symbol, i) for i in range(self.component_count)]
|
||
|
return self.distribution(*sym)
|
||
|
|
||
|
@property
|
||
|
def domain(self):
|
||
|
rvs = random_symbols(self.distribution)
|
||
|
if not rvs:
|
||
|
return SingleDomain(self.symbol, self.distribution.set)
|
||
|
return ProductDomain(*[rv.pspace.domain for rv in rvs])
|
||
|
|
||
|
def component_domain(self, index):
|
||
|
return self.set.args[index]
|
||
|
|
||
|
def marginal_distribution(self, *indices):
|
||
|
count = self.component_count
|
||
|
if count.atoms(Symbol):
|
||
|
raise ValueError("Marginal distributions cannot be computed "
|
||
|
"for symbolic dimensions. It is a work under progress.")
|
||
|
orig = [Indexed(self.symbol, i) for i in range(count)]
|
||
|
all_syms = [Symbol(str(i)) for i in orig]
|
||
|
replace_dict = dict(zip(all_syms, orig))
|
||
|
sym = tuple(Symbol(str(Indexed(self.symbol, i))) for i in indices)
|
||
|
limits = [[i,] for i in all_syms if i not in sym]
|
||
|
index = 0
|
||
|
for i in range(count):
|
||
|
if i not in indices:
|
||
|
limits[index].append(self.distribution.set.args[i])
|
||
|
limits[index] = tuple(limits[index])
|
||
|
index += 1
|
||
|
if self.distribution.is_Continuous:
|
||
|
f = Lambda(sym, integrate(self.distribution(*all_syms), *limits))
|
||
|
elif self.distribution.is_Discrete:
|
||
|
f = Lambda(sym, summation(self.distribution(*all_syms), *limits))
|
||
|
return f.xreplace(replace_dict)
|
||
|
|
||
|
def compute_expectation(self, expr, rvs=None, evaluate=False, **kwargs):
|
||
|
syms = tuple(self.value[i] for i in range(self.component_count))
|
||
|
rvs = rvs or syms
|
||
|
if not any(i in rvs for i in syms):
|
||
|
return expr
|
||
|
expr = expr*self.pdf
|
||
|
for rv in rvs:
|
||
|
if isinstance(rv, Indexed):
|
||
|
expr = expr.xreplace({rv: Indexed(str(rv.base), rv.args[1])})
|
||
|
elif isinstance(rv, RandomSymbol):
|
||
|
expr = expr.xreplace({rv: rv.symbol})
|
||
|
if self.value in random_symbols(expr):
|
||
|
raise NotImplementedError(filldedent('''
|
||
|
Expectations of expression with unindexed joint random symbols
|
||
|
cannot be calculated yet.'''))
|
||
|
limits = tuple((Indexed(str(rv.base),rv.args[1]),
|
||
|
self.distribution.set.args[rv.args[1]]) for rv in syms)
|
||
|
return Integral(expr, *limits)
|
||
|
|
||
|
def where(self, condition):
|
||
|
raise NotImplementedError()
|
||
|
|
||
|
def compute_density(self, expr):
|
||
|
raise NotImplementedError()
|
||
|
|
||
|
def sample(self, size=(), library='scipy', seed=None):
|
||
|
"""
|
||
|
Internal sample method
|
||
|
|
||
|
Returns dictionary mapping RandomSymbol to realization value.
|
||
|
"""
|
||
|
return {RandomSymbol(self.symbol, self): self.distribution.sample(size,
|
||
|
library=library, seed=seed)}
|
||
|
|
||
|
def probability(self, condition):
|
||
|
raise NotImplementedError()
|
||
|
|
||
|
|
||
|
class SampleJointScipy:
|
||
|
"""Returns the sample from scipy of the given distribution"""
|
||
|
def __new__(cls, dist, size, seed=None):
|
||
|
return cls._sample_scipy(dist, size, seed)
|
||
|
|
||
|
@classmethod
|
||
|
def _sample_scipy(cls, dist, size, seed):
|
||
|
"""Sample from SciPy."""
|
||
|
|
||
|
import numpy
|
||
|
if seed is None or isinstance(seed, int):
|
||
|
rand_state = numpy.random.default_rng(seed=seed)
|
||
|
else:
|
||
|
rand_state = seed
|
||
|
from scipy import stats as scipy_stats
|
||
|
scipy_rv_map = {
|
||
|
'MultivariateNormalDistribution': lambda dist, size: scipy_stats.multivariate_normal.rvs(
|
||
|
mean=matrix2numpy(dist.mu).flatten(),
|
||
|
cov=matrix2numpy(dist.sigma), size=size, random_state=rand_state),
|
||
|
'MultivariateBetaDistribution': lambda dist, size: scipy_stats.dirichlet.rvs(
|
||
|
alpha=list2numpy(dist.alpha, float).flatten(), size=size, random_state=rand_state),
|
||
|
'MultinomialDistribution': lambda dist, size: scipy_stats.multinomial.rvs(
|
||
|
n=int(dist.n), p=list2numpy(dist.p, float).flatten(), size=size, random_state=rand_state)
|
||
|
}
|
||
|
|
||
|
sample_shape = {
|
||
|
'MultivariateNormalDistribution': lambda dist: matrix2numpy(dist.mu).flatten().shape,
|
||
|
'MultivariateBetaDistribution': lambda dist: list2numpy(dist.alpha).flatten().shape,
|
||
|
'MultinomialDistribution': lambda dist: list2numpy(dist.p).flatten().shape
|
||
|
}
|
||
|
|
||
|
dist_list = scipy_rv_map.keys()
|
||
|
|
||
|
if dist.__class__.__name__ not in dist_list:
|
||
|
return None
|
||
|
|
||
|
samples = scipy_rv_map[dist.__class__.__name__](dist, size)
|
||
|
return samples.reshape(size + sample_shape[dist.__class__.__name__](dist))
|
||
|
|
||
|
class SampleJointNumpy:
|
||
|
"""Returns the sample from numpy of the given distribution"""
|
||
|
|
||
|
def __new__(cls, dist, size, seed=None):
|
||
|
return cls._sample_numpy(dist, size, seed)
|
||
|
|
||
|
@classmethod
|
||
|
def _sample_numpy(cls, dist, size, seed):
|
||
|
"""Sample from NumPy."""
|
||
|
|
||
|
import numpy
|
||
|
if seed is None or isinstance(seed, int):
|
||
|
rand_state = numpy.random.default_rng(seed=seed)
|
||
|
else:
|
||
|
rand_state = seed
|
||
|
numpy_rv_map = {
|
||
|
'MultivariateNormalDistribution': lambda dist, size: rand_state.multivariate_normal(
|
||
|
mean=matrix2numpy(dist.mu, float).flatten(),
|
||
|
cov=matrix2numpy(dist.sigma, float), size=size),
|
||
|
'MultivariateBetaDistribution': lambda dist, size: rand_state.dirichlet(
|
||
|
alpha=list2numpy(dist.alpha, float).flatten(), size=size),
|
||
|
'MultinomialDistribution': lambda dist, size: rand_state.multinomial(
|
||
|
n=int(dist.n), pvals=list2numpy(dist.p, float).flatten(), size=size)
|
||
|
}
|
||
|
|
||
|
sample_shape = {
|
||
|
'MultivariateNormalDistribution': lambda dist: matrix2numpy(dist.mu).flatten().shape,
|
||
|
'MultivariateBetaDistribution': lambda dist: list2numpy(dist.alpha).flatten().shape,
|
||
|
'MultinomialDistribution': lambda dist: list2numpy(dist.p).flatten().shape
|
||
|
}
|
||
|
|
||
|
dist_list = numpy_rv_map.keys()
|
||
|
|
||
|
if dist.__class__.__name__ not in dist_list:
|
||
|
return None
|
||
|
|
||
|
samples = numpy_rv_map[dist.__class__.__name__](dist, prod(size))
|
||
|
return samples.reshape(size + sample_shape[dist.__class__.__name__](dist))
|
||
|
|
||
|
class SampleJointPymc:
|
||
|
"""Returns the sample from pymc of the given distribution"""
|
||
|
|
||
|
def __new__(cls, dist, size, seed=None):
|
||
|
return cls._sample_pymc(dist, size, seed)
|
||
|
|
||
|
@classmethod
|
||
|
def _sample_pymc(cls, dist, size, seed):
|
||
|
"""Sample from PyMC."""
|
||
|
|
||
|
try:
|
||
|
import pymc
|
||
|
except ImportError:
|
||
|
import pymc3 as pymc
|
||
|
pymc_rv_map = {
|
||
|
'MultivariateNormalDistribution': lambda dist:
|
||
|
pymc.MvNormal('X', mu=matrix2numpy(dist.mu, float).flatten(),
|
||
|
cov=matrix2numpy(dist.sigma, float), shape=(1, dist.mu.shape[0])),
|
||
|
'MultivariateBetaDistribution': lambda dist:
|
||
|
pymc.Dirichlet('X', a=list2numpy(dist.alpha, float).flatten()),
|
||
|
'MultinomialDistribution': lambda dist:
|
||
|
pymc.Multinomial('X', n=int(dist.n),
|
||
|
p=list2numpy(dist.p, float).flatten(), shape=(1, len(dist.p)))
|
||
|
}
|
||
|
|
||
|
sample_shape = {
|
||
|
'MultivariateNormalDistribution': lambda dist: matrix2numpy(dist.mu).flatten().shape,
|
||
|
'MultivariateBetaDistribution': lambda dist: list2numpy(dist.alpha).flatten().shape,
|
||
|
'MultinomialDistribution': lambda dist: list2numpy(dist.p).flatten().shape
|
||
|
}
|
||
|
|
||
|
dist_list = pymc_rv_map.keys()
|
||
|
|
||
|
if dist.__class__.__name__ not in dist_list:
|
||
|
return None
|
||
|
|
||
|
import logging
|
||
|
logging.getLogger("pymc3").setLevel(logging.ERROR)
|
||
|
with pymc.Model():
|
||
|
pymc_rv_map[dist.__class__.__name__](dist)
|
||
|
samples = pymc.sample(draws=prod(size), chains=1, progressbar=False, random_seed=seed, return_inferencedata=False, compute_convergence_checks=False)[:]['X']
|
||
|
return samples.reshape(size + sample_shape[dist.__class__.__name__](dist))
|
||
|
|
||
|
|
||
|
_get_sample_class_jrv = {
|
||
|
'scipy': SampleJointScipy,
|
||
|
'pymc3': SampleJointPymc,
|
||
|
'pymc': SampleJointPymc,
|
||
|
'numpy': SampleJointNumpy
|
||
|
}
|
||
|
|
||
|
class JointDistribution(Distribution, NamedArgsMixin):
|
||
|
"""
|
||
|
Represented by the random variables part of the joint distribution.
|
||
|
Contains methods for PDF, CDF, sampling, marginal densities, etc.
|
||
|
"""
|
||
|
|
||
|
_argnames = ('pdf', )
|
||
|
|
||
|
def __new__(cls, *args):
|
||
|
args = list(map(sympify, args))
|
||
|
for i in range(len(args)):
|
||
|
if isinstance(args[i], list):
|
||
|
args[i] = ImmutableMatrix(args[i])
|
||
|
return Basic.__new__(cls, *args)
|
||
|
|
||
|
@property
|
||
|
def domain(self):
|
||
|
return ProductDomain(self.symbols)
|
||
|
|
||
|
@property
|
||
|
def pdf(self):
|
||
|
return self.density.args[1]
|
||
|
|
||
|
def cdf(self, other):
|
||
|
if not isinstance(other, dict):
|
||
|
raise ValueError("%s should be of type dict, got %s"%(other, type(other)))
|
||
|
rvs = other.keys()
|
||
|
_set = self.domain.set.sets
|
||
|
expr = self.pdf(tuple(i.args[0] for i in self.symbols))
|
||
|
for i in range(len(other)):
|
||
|
if rvs[i].is_Continuous:
|
||
|
density = Integral(expr, (rvs[i], _set[i].inf,
|
||
|
other[rvs[i]]))
|
||
|
elif rvs[i].is_Discrete:
|
||
|
density = Sum(expr, (rvs[i], _set[i].inf,
|
||
|
other[rvs[i]]))
|
||
|
return density
|
||
|
|
||
|
def sample(self, size=(), library='scipy', seed=None):
|
||
|
""" A random realization from the distribution """
|
||
|
|
||
|
libraries = ('scipy', 'numpy', 'pymc3', 'pymc')
|
||
|
if library not in libraries:
|
||
|
raise NotImplementedError("Sampling from %s is not supported yet."
|
||
|
% str(library))
|
||
|
if not import_module(library):
|
||
|
raise ValueError("Failed to import %s" % library)
|
||
|
|
||
|
samps = _get_sample_class_jrv[library](self, size, seed=seed)
|
||
|
|
||
|
if samps is not None:
|
||
|
return samps
|
||
|
raise NotImplementedError(
|
||
|
"Sampling for %s is not currently implemented from %s"
|
||
|
% (self.__class__.__name__, library)
|
||
|
)
|
||
|
|
||
|
def __call__(self, *args):
|
||
|
return self.pdf(*args)
|
||
|
|
||
|
class JointRandomSymbol(RandomSymbol):
|
||
|
"""
|
||
|
Representation of random symbols with joint probability distributions
|
||
|
to allow indexing."
|
||
|
"""
|
||
|
def __getitem__(self, key):
|
||
|
if isinstance(self.pspace, JointPSpace):
|
||
|
if (self.pspace.component_count <= key) == True:
|
||
|
raise ValueError("Index keys for %s can only up to %s." %
|
||
|
(self.name, self.pspace.component_count - 1))
|
||
|
return Indexed(self, key)
|
||
|
|
||
|
|
||
|
|
||
|
class MarginalDistribution(Distribution):
|
||
|
"""
|
||
|
Represents the marginal distribution of a joint probability space.
|
||
|
|
||
|
Initialised using a probability distribution and random variables(or
|
||
|
their indexed components) which should be a part of the resultant
|
||
|
distribution.
|
||
|
"""
|
||
|
|
||
|
def __new__(cls, dist, *rvs):
|
||
|
if len(rvs) == 1 and iterable(rvs[0]):
|
||
|
rvs = tuple(rvs[0])
|
||
|
if not all(isinstance(rv, (Indexed, RandomSymbol)) for rv in rvs):
|
||
|
raise ValueError(filldedent('''Marginal distribution can be
|
||
|
intitialised only in terms of random variables or indexed random
|
||
|
variables'''))
|
||
|
rvs = Tuple.fromiter(rv for rv in rvs)
|
||
|
if not isinstance(dist, JointDistribution) and len(random_symbols(dist)) == 0:
|
||
|
return dist
|
||
|
return Basic.__new__(cls, dist, rvs)
|
||
|
|
||
|
def check(self):
|
||
|
pass
|
||
|
|
||
|
@property
|
||
|
def set(self):
|
||
|
rvs = [i for i in self.args[1] if isinstance(i, RandomSymbol)]
|
||
|
return ProductSet(*[rv.pspace.set for rv in rvs])
|
||
|
|
||
|
@property
|
||
|
def symbols(self):
|
||
|
rvs = self.args[1]
|
||
|
return {rv.pspace.symbol for rv in rvs}
|
||
|
|
||
|
def pdf(self, *x):
|
||
|
expr, rvs = self.args[0], self.args[1]
|
||
|
marginalise_out = [i for i in random_symbols(expr) if i not in rvs]
|
||
|
if isinstance(expr, JointDistribution):
|
||
|
count = len(expr.domain.args)
|
||
|
x = Dummy('x', real=True)
|
||
|
syms = tuple(Indexed(x, i) for i in count)
|
||
|
expr = expr.pdf(syms)
|
||
|
else:
|
||
|
syms = tuple(rv.pspace.symbol if isinstance(rv, RandomSymbol) else rv.args[0] for rv in rvs)
|
||
|
return Lambda(syms, self.compute_pdf(expr, marginalise_out))(*x)
|
||
|
|
||
|
def compute_pdf(self, expr, rvs):
|
||
|
for rv in rvs:
|
||
|
lpdf = 1
|
||
|
if isinstance(rv, RandomSymbol):
|
||
|
lpdf = rv.pspace.pdf
|
||
|
expr = self.marginalise_out(expr*lpdf, rv)
|
||
|
return expr
|
||
|
|
||
|
def marginalise_out(self, expr, rv):
|
||
|
from sympy.concrete.summations import Sum
|
||
|
if isinstance(rv, RandomSymbol):
|
||
|
dom = rv.pspace.set
|
||
|
elif isinstance(rv, Indexed):
|
||
|
dom = rv.base.component_domain(
|
||
|
rv.pspace.component_domain(rv.args[1]))
|
||
|
expr = expr.xreplace({rv: rv.pspace.symbol})
|
||
|
if rv.pspace.is_Continuous:
|
||
|
#TODO: Modify to support integration
|
||
|
#for all kinds of sets.
|
||
|
expr = Integral(expr, (rv.pspace.symbol, dom))
|
||
|
elif rv.pspace.is_Discrete:
|
||
|
#incorporate this into `Sum`/`summation`
|
||
|
if dom in (S.Integers, S.Naturals, S.Naturals0):
|
||
|
dom = (dom.inf, dom.sup)
|
||
|
expr = Sum(expr, (rv.pspace.symbol, dom))
|
||
|
return expr
|
||
|
|
||
|
def __call__(self, *args):
|
||
|
return self.pdf(*args)
|