diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs new file mode 100644 index 0000000..7417668 --- /dev/null +++ b/.git-blame-ignore-revs @@ -0,0 +1,4 @@ +# $ git config blame.ignoreRevsFile .git-blame-ignore-revs + +# Sporadic fixes in test_random.py +e76aa3a5a4b889c0434f0103ec102a50b93ab1ca diff --git a/mkl_random/__init__.py b/mkl_random/__init__.py index 512027b..b15fec7 100644 --- a/mkl_random/__init__.py +++ b/mkl_random/__init__.py @@ -28,7 +28,58 @@ from . import _init_helper -from .mklrand import * +from .mklrand import ( + MKLRandomState, + RandomState, + seed, + get_state, + set_state, + random_sample, + choice, + randint, + bytes, + uniform, + rand, + randn, + random_integers, + standard_normal, + normal, + beta, + exponential, + standard_exponential, + standard_gamma, + gamma, + f, + noncentral_f, + chisquare, + noncentral_chisquare, + standard_cauchy, + standard_t, + vonmises, + pareto, + weibull, + power, + laplace, + gumbel, + logistic, + lognormal, + rayleigh, + wald, + triangular, + binomial, + negative_binomial, + poisson, + zipf, + geometric, + hypergeometric, + logseries, + multivariate_normal, + multinormal_cholesky, + multinomial, + dirichlet, + shuffle, + permutation, +) from ._version import __version__ try: @@ -42,4 +93,60 @@ test = PytestTester(__name__) del PytestTester +import mkl_random.interfaces + +__all__ = [ + "MKLRandomState", + "RandomState", + "seed", + "get_state", + "set_state", + "random_sample", + "choice", + "randint", + "bytes", + "uniform", + "rand", + "randn", + "random_integers", + "standard_normal", + "normal", + "beta", + "exponential", + "standard_exponential", + "standard_gamma", + "gamma", + "f", + "noncentral_f", + "chisquare", + "noncentral_chisquare", + "standard_cauchy", + "standard_t", + "vonmises", + "pareto", + "weibull", + "power", + "laplace", + "gumbel", + "logistic", + "lognormal", + "rayleigh", + "wald", + "triangular", + "binomial", + "negative_binomial", + "poisson", + "zipf", + "geometric", + "hypergeometric", + "logseries", + "multivariate_normal", + "multinormal_cholesky", + "multinomial", + "dirichlet", + "shuffle", + "permutation", + "interfaces", +] + del _init_helper diff --git a/mkl_random/interfaces/README.md b/mkl_random/interfaces/README.md new file mode 100644 index 0000000..237dac5 --- /dev/null +++ b/mkl_random/interfaces/README.md @@ -0,0 +1,18 @@ +# Interfaces +The `mkl_random` package provides interfaces that serve as drop-in replacements for equivalent functions in NumPy. + +--- + +## NumPy interface - `mkl_random.interfaces.numpy_random` + +This interface is a drop-in replacement for the legacy portion of the [`numpy.random`](https://numpy.org/devdocs/reference/random/legacy.html) module and includes **all** classes and functions available there: + +* random generator: `RandomState`. + +* seeding and state functions: `get_state`, `set_state`, and `seed`. + +* simple random data: `rand`, `randn`, `randint`, `random_integers`, `random_sample`, `choice` and `bytes`. + +* permutations: `shuffle` and `permutation` + +* distributions: `beta`, `binomial`, `chisquare`, `dirichlet`, `exponential`, `f`, `gamma`, `geometric`, `gumbel`, `hypergeometric`, `laplace`, `logistic`, `lognormal`, `multinomial`, `multivariate_normal`, `negative_binomial`, `noncentral_chisquare`, `noncentral_f`, `normal`, `pareto`, `poisson`, `power`, `rayleigh`, `standard_cauchy`, `standard_exponential`, `standard_gamma`, `standard_normal`, `standard_t`, `triangular`, `uniform`, `vonmises`, `wald`, `weibull`, and `zipf`. diff --git a/mkl_random/interfaces/__init__.py b/mkl_random/interfaces/__init__.py new file mode 100644 index 0000000..161d4dd --- /dev/null +++ b/mkl_random/interfaces/__init__.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python +# Copyright (c) 2017, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of Intel Corporation nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from . import numpy_random diff --git a/mkl_random/interfaces/_numpy_random.py b/mkl_random/interfaces/_numpy_random.py new file mode 100644 index 0000000..0434da1 --- /dev/null +++ b/mkl_random/interfaces/_numpy_random.py @@ -0,0 +1,646 @@ +#!/usr/bin/env python +# Copyright (c) 2017, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of Intel Corporation nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +""" +An interface for the legacy RandomState interface of the NumPy random module +(`numpy.random`) that uses OneMKL RNG in the backend. +""" + +import mkl_random + + +class RandomState(mkl_random.mklrand._MKLRandomState): + """ + RandomState(seed=None) + + Container for the Intel(R) MKL-powered Mersenne Twister pseudo-random + number generator. + + For full documentation refer to `numpy.random.RandomState`. + + *Compatibility Notice* + While this class shares some similarities with the original `RandomState`, + it has been rewritten to use MKL's vector statistics functionality, that + provides efficient implementation of the MT19937. + As consequences: + this version is NOT seed-compatible with the original `RandomState`. + the result of `get_state` is NOT compatible with the original + `RandomState` + + References + ----- + MKL Documentation: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html + + """ + def __init__(self, seed=None): + super().__init__(seed=seed, brng="MT19937") + + def seed(self, seed=None): + """ + seed(seed=Nonee) + + Seed the generator. + + For full documentation refer to `numpy.random.seed`. + + """ + return super().seed(seed=seed, brng="MT19937") + + def get_state(self, legacy=True): + """ + get_state(legacy) + + Get the internal state of the generator. + + For full documentation refer to `numpy.random.get_state`. + + *Compatibility Notice* + As this class uses MKL in the backend, the state format is NOT + compatible with the original `numpy.random.set_state`. The returned + state represents the MKL stream state as a bytes object, which CANNOT + be interpreted by NumPy's `RandomState`. + + The `legacy` argument is included for compatibility with the original + `RandomState`. + """ + return super().get_state(legacy=legacy) + + def set_state(self, state): + """ + set_state(state) + + Set the internal state of the generator. + + For full documentation refer to `numpy.random.set_state`. + + *Compatibility Notice* + As this class uses MKL in the backend, the state of the generator is + NOT deterministic with states returned from the original + `numpy.random.get_state`. + + """ + return super().set_state(state=state, brng="MT19937") + + def random_sample(self, size=None): + """ + random_sample(size=None) + + Return random floats in the half-open interval [0.0, 1.0). + + For full documentation refer to `numpy.random.random_sample`. + + """ + return super().random_sample(size=size) + + def randint(self, low, high=None, size=None, dtype=int): + """ + randint(low, high=None, size=None, dtype=int) + + Return random integers from `low` (inclusive) to `high` (exclusive). + + For full documentation refer to `numpy.random.randint`. + + """ + return super().randint(low=low, high=high, size=size, dtype=dtype) + + def bytes(self, length): + """ + bytes(length) + + Return random bytes. + + For full documentation refer to `numpy.random.bytes`. + + """ + return super().bytes(length=length) + + def choice(self, a, size=None, replace=True, p=None): + """ + choice(a, size=None, replace=True, p=None) + + Generates a random sample from a given 1-D array. + + For full documentation refer to `numpy.random.choice`. + + """ + return super().choice(a=a, size=size, replace=replace, p=p) + + def uniform(self, low=0.0, high=1.0, size=None): + """ + uniform(low=0.0, high=1.0, size=None) + + Draw samples from a uniform distribution. + + For full documentation refer to `numpy.random.uniform`. + + """ + return super().uniform(low=low, high=high, size=size) + + def rand(self, *args): + """ + rand(d0, d1, ..., dn) + + Random values in a given shape. + + For full documentation refer to `numpy.random.rand`. + + """ + return super().rand(*args) + + def randn(self, *args): + """ + randn(d0, d1, ..., dn) + + Return a sample (or samples) from the "standard normal" distribution. + + For full documentation refer to `numpy.random.randn`. + + """ + return super().randn(*args) + + def random_integers(self, low, high=None, size=None): + """ + random_integers(low, high=None, size=None) + + Return random integers from `low` (inclusive) to `high` (inclusive). + + For full documentation refer to `numpy.random.random_integers`. + + """ + return super().random_integers(low=low, high=high, size=size) + + def standard_normal(self, size=None): + """ + standard_normal(size=None) + + Draw samples from a standard Normal distribution (mean=0, stdev=1). + + For full documentation refer to `numpy.random.standard_normal`. + + """ + return super().standard_normal(size=size, method="ICDF") + + def normal(self, loc=0.0, scale=1.0, size=None): + """ + normal(loc=0.0, scale=1.0, size=None, method='ICDF') + + Draw random samples from a normal (Gaussian) distribution. + + For full documentation refer to `numpy.random.normal`. + + """ + return super().normal(loc=loc, scale=scale, size=size, method="ICDF") + + def beta(self, a, b, size=None): + """ + beta(a, b, size=None) + + Draw random samples from a Beta distribution. + + For full documentation refer to `numpy.random.beta`. + + """ + return super().beta(a=a, b=b, size=size,) + + def exponential(self, scale=1.0, size=None): + """ + exponential(scale=1.0, size=None) + + Draw samples from an exponential distribution. + + For full documentation refer to `numpy.random.exponential`. + + """ + return super().exponential(scale=scale, size=size) + + def standard_exponential(self, size=None): + """ + standard_exponential(size=None) + + Draw samples from the standard exponential distribution. + + For full documentation refer to `numpy.random.standard_exponential`. + + """ + return super().standard_exponential(size=size) + + def standard_gamma(self, shape, size=None): + """ + standard_gamma(shape, size=None) + + Draw samples from the standard gamma distribution. + + For full documentation refer to `numpy.random.standard_gamma`. + + """ + return super().standard_gamma(shape=shape, size=size) + + def gamma(self, shape, scale=1.0, size=None): + """ + gamma(shape, scale=1.0, size=None) + + Draw samples from a gamma distribution. + + For full documentation refer to `numpy.random.gamma`. + + """ + return super().gamma(shape=shape, scale=scale, size=size) + + def f(self, dfn, dfd, size=None): + """ + f(dfn, dfd, size=None) + + Draw samples from an F distribution. + + For full documentation refer to `numpy.random.f`. + + """ + return super().f(dfn=dfn, dfd=dfd, size=size) + + def noncentral_f(self, dfn, dfd, nonc, size=None): + """ + noncentral_f(dfn, dfd, nonc, size=None) + + Draw samples from a non-central F distribution. + + For full documentation refer to `numpy.random.noncentral_f`. + + """ + return super().noncentral_f(dfn=dfn, dfd=dfd, nonc=nonc, size=size) + + def chisquare(self, df, size=None): + """ + chisquare(df, size=None) + + Draw samples from a chi-square distribution. + + For full documentation refer to `numpy.random.chisquare`. + + """ + return super().chisquare(df=df, size=size) + + def noncentral_chisquare(self, df, nonc, size=None): + """ + noncentral_chisquare(df, nonc, size=None) + + Draw samples from a non-central chi-square distribution. + + For full documentation refer to `numpy.random.noncentral_chisquare`. + + """ + return super().noncentral_chisquare(df=df, nonc=nonc, size=size) + + def standard_cauchy(self, size=None): + """ + standard_cauchy(size=None) + + Draw samples from a standard Cauchy distribution. + + For full documentation refer to `numpy.random.standard_cauchy`. + + """ + return super().standard_cauchy(size=size) + + def standard_t(self, df, size=None): + """ + standard_t(df, size=None) + + Draw samples from a standard Student's t distribution. + + For full documentation refer to `numpy.random.standard_t`. + + """ + return super().standard_t(df=df, size=size) + + def vonmises(self, mu, kappa, size=None): + """ + vonmises(mu, kappa, size=None) + + Draw samples from a von Mises distribution. + + For full documentation refer to `numpy.random.vonmises`. + + """ + return super().vonmises(mu=mu, kappa=kappa, size=size) + + def pareto(self, a, size=None): + """ + pareto(a, size=None) + + Draw samples from a Pareto II or Lomax distribution with a scale parameter of 1. + + For full documentation refer to `numpy.random.pareto`. + + """ + return super().pareto(a=a, size=size) + + def weibull(self, a, size=None): + """ + weibull(a, size=None) + + Draw samples from a Weibull distribution. + + For full documentation refer to `numpy.random.weibull`. + + """ + return super().weibull(a=a, size=size) + + def power(self, a, size=None): + """ + power(a, size=None) + + Draw samples from a power distribution. + + For full documentation refer to `numpy.random.power`. + + """ + return super().power(a=a, size=size) + + def laplace(self, loc=0.0, scale=1.0, size=None): + """ + laplace(loc=0.0, scale=1.0, size=None) + + Draw samples from the Laplace distribution. + + For full documentation refer to `numpy.random.laplace`. + + """ + return super().laplace(loc=loc, scale=scale, size=size) + + def gumbel(self, loc=0.0, scale=1.0, size=None): + """ + gumbel(loc=0.0, scale=1.0, size=None) + + Draw samples from a Gumbel distribution. + + For full documentation refer to `numpy.random.gumbel`. + + """ + return super().gumbel(loc=loc, scale=scale, size=size) + + def logistic(self, loc=0.0, scale=1.0, size=None): + """ + logistic(loc=0.0, scale=1.0, size=None) + + Draw samples from a logistic distribution. + + For full documentation refer to `numpy.random.logistic`. + + """ + return super().logistic(loc=loc, scale=scale, size=size) + + def lognormal(self, mean=0.0, sigma=1.0, size=None): + """ + lognormal(mean=0.0, sigma=1.0, size=None) + + Draw random samples from a log-normal distribution. + + For full documentation refer to `numpy.random.lognormal`. + + """ + return super().lognormal(mean=mean, sigma=sigma, size=size, method="ICDF") + + def rayleigh(self, scale=1.0, size=None): + """ + rayleigh(scale=1.0, size=None) + + Draw samples from a Rayleigh distribution. + + For full documentation refer to `numpy.random.rayleigh`. + + """ + return super().rayleigh(scale=scale, size=size) + + def wald(self, mean, scale, size=None): + """ + wald(mean, scale, size=None) + + Draw samples from a Wald distribution. + + For full documentation refer to `numpy.random.wald`. + + """ + return super().wald(mean=mean, scale=scale, size=size) + + def triangular(self, left, mode, right, size=None): + """ + triangular(left, mode, right, size=None) + + Draw samples from a triangular distribution. + + For full documentation refer to `numpy.random.triangular`. + + """ + return super().triangular(left=left, mode=mode, right=right, size=size) + + def binomial(self, n, p, size=None): + """ + binomial(n, p, size=None) + + Draw samples from a binomial distribution. + + For full documentation refer to `numpy.random.binomial`. + + """ + return super().binomial(n=n, p=p, size=size) + + def negative_binomial(self, n, p, size=None): + """ + negative_binomial(n, p, size=None) + + Draw samples from a negative binomial distribution. + + For full documentation refer to `numpy.random.negative_binomial`. + + """ + return super().negative_binomial(n=n, p=p, size=size) + + def poisson(self, lam=1.0, size=None): + """ + poisson(lam=1.0, size=None) + + Draw random samples from a Poisson distribution. + + For full documentation refer to `numpy.random.poisson`. + + """ + return super().poisson(lam=lam, size=size, method="POISNORM") + + def zipf(self, a, size=None): + """ + zipf(a, size=None) + + Draw samples from a Zipf distribution. + + For full documentation refer to `numpy.random.zipf`. + + """ + return super().zipf(a=a, size=size) + + def geometric(self, p, size=None): + """ + geometric(p, size=None) + + Draw samples from the geometric distribution. + + For full documentation refer to `numpy.random.geometric`. + + """ + return super().geometric(p=p, size=size) + + def hypergeometric(self, ngood, nbad, nsample, size=None): + """ + hypergeometric(ngood, nbad, nsample, size=None) + + Draw samples from a hypergeometric distribution. + + For full documentation refer to `numpy.random.hypergeometric`. + + """ + return super().hypergeometric(ngood=ngood, nbad=nbad, nsample=nsample, size=size) + + def logseries(self, p, size=None): + """ + logseries(p, size=None) + + Draw samples from a logarithmic series distribution. + + For full documentation refer to `numpy.random.logseries`. + + """ + return super().logseries(p=p, size=size) + + def multivariate_normal(self, mean, cov, size=None, check_valid='warn', tol=1e-8): + """ + multivariate_normal(mean, cov, size=None, check_valid='warn', tol=1e-8) + + Draw random samples from a multivariate normal distribution. + + For full documentation refer to `numpy.random.multivariate_normal`. + + """ + return super().multivariate_normal( + mean=mean, cov=cov, size=size, check_valid=check_valid, tol=tol + ) + + def multinomial(self, n, pvals, size=None): + """ + multinomial(n, pvals, size=None) + + Draw samples from a multinomial distribution. + + For full documentation refer to `numpy.random.multinomial`. + + """ + return super().multinomial(n=n, pvals=pvals, size=size) + + def dirichlet(self, alpha, size=None): + """ + dirichlet(alpha, size=None) + + Draw samples from the Dirichlet distribution. + + For full documentation refer to `numpy.random.dirichlet`. + + """ + return super().dirichlet(alpha=alpha, size=size) + + def shuffle(self, x): + """ + shuffle(x) + + Modify a sequence in-place by shuffling its contents. + + For full documentation refer to `numpy.random.shuffle`. + + """ + return super().shuffle(x=x) + + def permutation(self, x): + """ + permutation(x) + + Randomly permute a sequence, or return a permuted range. + + For full documentation refer to `numpy.random.permutation`. + + """ + return super().permutation(x=x) + + +# instantiate a default RandomState object to be used by module-level functions +_rand = RandomState() +# define module-level functions using methods of a default RandomState object +seed = _rand.seed +get_state = _rand.get_state +set_state = _rand.set_state +random_sample = _rand.random_sample +choice = _rand.choice +randint = _rand.randint +bytes = _rand.bytes +uniform = _rand.uniform +rand = _rand.rand +randn = _rand.randn +random_integers = _rand.random_integers +standard_normal = _rand.standard_normal +normal = _rand.normal +beta = _rand.beta +exponential = _rand.exponential +standard_exponential = _rand.standard_exponential +standard_gamma = _rand.standard_gamma +gamma = _rand.gamma +f = _rand.f +noncentral_f = _rand.noncentral_f +chisquare = _rand.chisquare +noncentral_chisquare = _rand.noncentral_chisquare +standard_cauchy = _rand.standard_cauchy +standard_t = _rand.standard_t +vonmises = _rand.vonmises +pareto = _rand.pareto +weibull = _rand.weibull +power = _rand.power +laplace = _rand.laplace +gumbel = _rand.gumbel +logistic = _rand.logistic +lognormal = _rand.lognormal +rayleigh = _rand.rayleigh +wald = _rand.wald +triangular = _rand.triangular + +binomial = _rand.binomial +negative_binomial = _rand.negative_binomial +poisson = _rand.poisson +zipf = _rand.zipf +geometric = _rand.geometric +hypergeometric = _rand.hypergeometric +logseries = _rand.logseries + +multivariate_normal = _rand.multivariate_normal +multinomial = _rand.multinomial +dirichlet = _rand.dirichlet + +shuffle = _rand.shuffle +permutation = _rand.permutation diff --git a/mkl_random/interfaces/numpy_random.py b/mkl_random/interfaces/numpy_random.py new file mode 100644 index 0000000..2c9e780 --- /dev/null +++ b/mkl_random/interfaces/numpy_random.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python +# Copyright (c) 2017, Intel Corporation +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# * Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of Intel Corporation nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from ._numpy_random import ( + RandomState, + seed, + get_state, + set_state, + random_sample, + choice, + randint, + bytes, + uniform, + rand, + randn, + random_integers, + standard_normal, + normal, + beta, + exponential, + standard_exponential, + standard_gamma, + gamma, + f, + noncentral_f, + chisquare, + noncentral_chisquare, + standard_cauchy, + standard_t, + vonmises, + pareto, + weibull, + power, + laplace, + gumbel, + logistic, + lognormal, + rayleigh, + wald, + triangular, + binomial, + negative_binomial, + poisson, + zipf, + geometric, + hypergeometric, + logseries, + multivariate_normal, + multinomial, + dirichlet, + shuffle, + permutation, +) + +__all__ = [ + "RandomState", + "seed", + "get_state", + "set_state", + "random_sample", + "choice", + "randint", + "bytes", + "uniform", + "rand", + "randn", + "random_integers", + "standard_normal", + "normal", + "beta", + "exponential", + "standard_exponential", + "standard_gamma", + "gamma", + "f", + "noncentral_f", + "chisquare", + "noncentral_chisquare", + "standard_cauchy", + "standard_t", + "vonmises", + "pareto", + "weibull", + "power", + "laplace", + "gumbel", + "logistic", + "lognormal", + "rayleigh", + "wald", + "triangular", + "binomial", + "negative_binomial", + "poisson", + "zipf", + "geometric", + "hypergeometric", + "logseries", + "multivariate_normal", + "multinomial", + "dirichlet", + "shuffle", + "permutation", +] diff --git a/mkl_random/mklrand.pyx b/mkl_random/mklrand.pyx index 5cd886e..09fb67f 100644 --- a/mkl_random/mklrand.pyx +++ b/mkl_random/mklrand.pyx @@ -193,7 +193,7 @@ ctypedef void (* irk_discdptr_vec)(irk_state *state, cnp.npy_intp len, int *res, cdef int r = cnp._import_array() -if (r<0): +if (r < 0): raise ImportError("Failed to import NumPy") import numpy as np @@ -1024,66 +1024,16 @@ def _brng_id_to_name(int brng_id): return brng_name -cdef class RandomState: - """ - RandomState(seed=None, brng='MT19937') - - Container for the Intel(R) MKL-powered (pseudo-)random number generators. - - `RandomState` exposes a number of methods for generating random numbers - drawn from a variety of probability distributions. In addition to the - distribution-specific arguments, each method takes a keyword argument - `size` that defaults to ``None``. If `size` is ``None``, then a single - value is generated and returned. If `size` is an integer, then a 1-D - array filled with generated values is returned. If `size` is a tuple, - then an array with that shape is filled and returned. - - *Compatibility Notice* - This version of numpy.random has been rewritten to use MKL's vector - statistics functionality, that provides efficient implementation of - the MT19937 and many other basic psuedo-random number generation - algorithms as well as efficient sampling from other common statistical - distributions. As a consequence this version is NOT seed-compatible with - the original numpy.random. - - Parameters - ---------- - seed : {None, int, array_like}, optional - Random seed initializing the pseudo-random number generator. - Can be an integer, an array (or other sequence) of integers of - any length, or ``None`` (the default). - If `seed` is ``None``, then `RandomState` will try to read data from - ``/dev/urandom`` (or the Windows analogue) if available or seed from - the clock otherwise. - brng : {'MT19937', 'SFMT19937', 'MT2203', 'R250', 'WH', 'MCG31', 'MCG59', - 'MRG32K3A', 'PHILOX4X32X10', 'NONDETERM', 'ARS5'}, optional - basic pseudo-random number generation algorithms, or non-deterministic - hardware-based generator, provided by Intel MKL. The default choice is - 'MT19937' - the Mersenne Twister generator. - - Notes - ----- - The Python stdlib module "random" also contains a Mersenne Twister - pseudo-random number generator with a number of methods that are similar - to the ones available in `RandomState`. `RandomState`, besides being - NumPy-aware, has the advantage that it provides a much larger number - of probability distributions to choose from. - - References - ----- - MKL Documentation: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html - - """ +cdef class _MKLRandomState: cdef irk_state *internal_state cdef object lock - poisson_lam_max = np.iinfo('l').max - np.sqrt(np.iinfo('l').max)*10 def __init__(self, seed=None, brng='MT19937'): self.internal_state = PyMem_Malloc(sizeof(irk_state)) memset(self.internal_state, 0, sizeof(irk_state)) self.lock = Lock() - self.seed(seed, brng) + self._seed_impl(seed, brng) def __dealloc__(self): if self.internal_state != NULL: @@ -1091,36 +1041,7 @@ cdef class RandomState: PyMem_Free(self.internal_state) self.internal_state = NULL - def seed(self, seed=None, brng=None): - """ - seed(seed=None, brng=None) - - Seed the generator. - - This method is called when `RandomState` is initialized. It can be - called again to re-seed the generator. For details, see `RandomState`. - - Parameters - ---------- - seed : int or array_like, optional - Seed for `RandomState`. - Must be convertible to 32 bit unsigned integers. - brng : {'MT19937', 'SFMT19937', 'MT2203', 'R250', 'WH', 'MCG31', - 'MCG59', 'MRG32K3A', 'PHILOX4X32X10', 'NONDETERM', - 'ARS5', None}, optional - basic pseudo-random number generation algorithms, or non-deterministic - hardware-based generator, provided by Intel MKL. Use `brng==None` to keep - the `brng` specified during construction of this class instance. - - See Also - -------- - RandomState - - References - -------- - MKL Documentation: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html - - """ + def _seed_impl(self, seed=None, brng=None): cdef irk_error errcode cdef irk_brng_t brng_token = MT19937 cdef unsigned int stream_id @@ -1151,7 +1072,40 @@ cdef class RandomState: irk_seed_mkl_array(self.internal_state, cnp.PyArray_DATA(obj), cnp.PyArray_DIM(obj, 0), brng_token, stream_id) - def get_state(self): + def seed(self, seed=None, brng=None): + """ + seed(seed=None, brng=None) + + Seed the generator. + + This method is called when `MKLRandomState` is initialized. It can be + called again to re-seed the generator. For details, see `MKLRandomState`. + + Parameters + ---------- + seed : int or array_like, optional + Seed for `MKLRandomState`. + Must be convertible to 32 bit unsigned integers. + brng : {'MT19937', 'SFMT19937', 'MT2203', 'R250', 'WH', 'MCG31', + 'MCG59', 'MRG32K3A', 'PHILOX4X32X10', 'NONDETERM', + 'ARS5', None}, optional + basic pseudo-random number generation algorithms, or non-deterministic + hardware-based generator, provided by Intel MKL. Use `brng==None` to keep + the `brng` specified during construction of this class instance. + + See Also + -------- + MKLRandomState + + References + -------- + MKL Documentation: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html + + """ + self._seed_impl(seed, brng) + + + def get_state(self, legacy=True): """ get_state() @@ -1159,13 +1113,18 @@ cdef class RandomState: For more details, see `set_state`. + Parameters + ---------- + legacy : bool, optional + Flag indicating to return a legacy tuple state. + Returns ------- - out : tuple(str, bytes) + out : {tuple(str, bytes), dict} The returned tuple has the following items: - 1. the string, defaulting to 'MT19937', specifying the - basic psedo-random number generation algorithm. + 1. a string specifying the basic psedo-random number generation + algorithm. 2. a bytes object holding content of Intel MKL's stream for the given BRNG. @@ -1196,7 +1155,15 @@ cdef class RandomState: irk_get_state_mkl(self.internal_state, bytesPtr) brng_name = _brng_id_to_name(brng_id) - return (brng_name, bytestring) + if legacy: + return (brng_name, bytestring) + else: + return { + "bit_generator": brng_name, + "state": { + "mkl_stream": bytestring + } + } def set_state(self, state): """ @@ -1209,11 +1176,11 @@ cdef class RandomState: Parameters ---------- - state : tuple(str, bytes) + state : {tuple(str, bytes), dict} The `state` tuple has the following items: - 1. the string, defaulting to 'MT19937', specifying the - basic psedo-random number generation algorithm. + 1. a string specifying the basic psedo-random number generation + algorithm. 2. a bytes object holding content of Intel MKL's stream for the given BRNG. @@ -1245,20 +1212,29 @@ cdef class RandomState: cdef int brng_id cdef cnp.ndarray obj "arrayObject_obj" - state_len = len(state) - if(state_len != 2): - if (state_len == 3 or state_len == 5): - algo_name, key, pos = state[:3] - if algo_name != 'MT19937': - raise ValueError("The legacy state input algorithm must be 'MT19937'") - try: - obj = cnp.PyArray_ContiguousFromObject(key, cnp.NPY_ULONG, 1, 1) - except TypeError: - # compatibility -- could be an older pickle - obj = cnp.PyArray_ContiguousFromObject(key, cnp.NPY_LONG, 1, 1) - self.seed(obj, brng = algo_name) - return - raise ValueError("The argument to set_state must be a list of 2 elements") + + if isinstance(state, (tuple, list)): + state_len = len(state) + if (state_len != 2): + if (state_len == 3 or state_len == 5): + algo_name, key, pos = state[:3] + if algo_name != 'MT19937': + raise ValueError("The legacy state input algorithm must be 'MT19937'") + try: + obj = cnp.PyArray_ContiguousFromObject(key, cnp.NPY_ULONG, 1, 1) + except TypeError: + # compatibility -- could be an older pickle + obj = cnp.PyArray_ContiguousFromObject(key, cnp.NPY_LONG, 1, 1) + self.seed(obj, brng = algo_name) + return + raise ValueError("The argument to set_state must be a list of 2 elements") + elif isinstance(state, dict): + try: + state = (state["bit_generator"], state["state"]["mkl_stream"]) + except KeyError: + raise ValueError("state dictionary is not valid") + else: + raise TypeError("state must be a dict or a tuple.") algorithm_name = state[0] if algorithm_name not in _brng_dict.keys(): @@ -1289,44 +1265,6 @@ cdef class RandomState: global __RandomState_ctor return (__RandomState_ctor, (), self.get_state()) - def leapfrog(self, int k, int nstreams): - """ - leapfrog(k, nstreams) - - Initializes the current state generator using leap-frog method, - if supported for the basic random pseudo-random number generation - algorithm. - """ - cdef int err, brng_id - - err = irk_leapfrog_stream_mkl(self.internal_state, k, nstreams); - - if err == -1: - raise ValueError('The stream state buffer is corrupted') - elif err == 1: - with self.lock: - brng_id = irk_get_brng_mkl(self.internal_state) - raise ValueError("Leap-frog method of stream initialization is not supported for " + str(_brng_id_to_name(brng_id))) - - def skipahead(self, long long int nskips): - """ - skipahead(nskips) - - Initializes the current state generator using skip-ahead method, - if supported for the basic random pseudo-random number generation - algorithm. - """ - cdef int err, brng_id - - err = irk_skipahead_stream_mkl(self.internal_state, nskips); - - if err == -1: - raise ValueError('The stream state buffer is corrupted') - elif err == 1: - with self.lock: - brng_id = irk_get_brng_mkl(self.internal_state) - raise ValueError("Skip-ahead method of stream initialization is not supported for " + str(_brng_id_to_name(brng_id))) - # Basic distributions: def random_sample(self, size=None): """ @@ -1372,51 +1310,6 @@ cdef class RandomState: """ return vec_cont0_array(self.internal_state, irk_double_vec, size, self.lock) - def tomaxint(self, size=None): - """ - tomaxint(size=None) - - Return a sample of uniformly distributed random integers in the interval - [0, ``np.iinfo("long").max``]. - - Parameters - ---------- - size : int or tuple of ints, optional - Output shape. If the given shape is, e.g., ``(m, n, k)``, then - ``m * n * k`` samples are drawn. Default is None, in which case a - single value is returned. - - Returns - ------- - out : ndarray - Drawn samples, with shape `size`. - - See Also - -------- - randint : Uniform sampling over a given half-open interval of integers. - random_integers : Uniform sampling over a given closed interval of - integers. - - Examples - -------- - >>> RS = np.random_random_intel.RandomState() # need a RandomState object - >>> RS.tomaxint((2,2,2)) - array([[[1170048599, 1600360186], - [ 739731006, 1947757578]], - [[1871712945, 752307660], - [1601631370, 1479324245]]]) - >>> import sys - >>> sys.maxint - 2147483647 - >>> RS.tomaxint((2,2,2)) < sys.maxint - array([[[ True, True], - [ True, True]], - [[ True, True], - [ True, True]]], dtype=bool) - - """ - return vec_long_disc0_array(self.internal_state, irk_long_vec, size, self.lock) - # Set up dictionary of integer types and relevant functions. # # The dictionary is keyed by dtype(...).name and the values @@ -1763,135 +1656,42 @@ cdef class RandomState: return ret - - def randint_untyped(self, low, high=None, size=None): + def bytes(self, cnp.npy_intp length): """ - randint_untyped(low, high=None, size=None, dtype=int) - - Return random integers from `low` (inclusive) to `high` (exclusive). + bytes(length) - Return random integers from the "discrete uniform" distribution of - the specified dtype in the "half-open" interval [`low`, `high`). If - `high` is None (the default), then results are from [0, `low`). + Return random bytes. Parameters ---------- - low : int - Lowest (signed) integer to be drawn from the distribution (unless - ``high=None``, in which case this parameter is the *highest* such - integer). - high : int, optional - If provided, one above the largest (signed) integer to be drawn - from the distribution (see above for behavior if ``high=None``). - size : int or tuple of ints, optional - Output shape. If the given shape is, e.g., ``(m, n, k)``, then - ``m * n * k`` samples are drawn. Default is None, in which case a - single value is returned. + length : int + Number of random bytes. Returns ------- - out : int or ndarray of ints - `size`-shaped array of random integers from the appropriate - distribution, or a single such random int if `size` not provided. - - See Also - -------- - random.random_integers : similar to `randint`, only for the closed - interval [`low`, `high`], and 1 is the lowest value if `high` is - omitted. In particular, this other one is the one to use to generate - uniformly distributed discrete non-integers. + out : str + String of length `length`. Examples -------- - >>> mkl_random.randint(2, size=10) - array([1, 0, 0, 0, 1, 1, 0, 0, 1, 0]) - >>> mkl_random.randint(1, size=10) - array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + >>> mkl_random.bytes(10) + ' eh\\x85\\x022SZ\\xbf\\xa4' #random - Generate a 2 x 4 array of ints between 0 and 4, inclusive: + """ + cdef void *bytes + bytestring = empty_py_bytes(length, &bytes) + with self.lock, nogil: + irk_fill(bytes, length, self.internal_state) + return bytestring - >>> mkl_random.randint(5, size=(2, 4)) - array([[4, 0, 2, 1], - [3, 2, 2, 0]]) + def choice(self, a, size=None, replace=True, p=None): """ - cdef long lo, hi - cdef long *array_long_data - cdef int * array_int_data - cdef cnp.ndarray array "arrayObject" - cdef cnp.npy_intp length - cdef int rv_int - cdef long rv_long + choice(a, size=None, replace=True, p=None) - if high is None: - lo = 0 - hi = low - else: - lo = low - hi = high + Generates a random sample from a given 1-D array - if lo >= hi : - raise ValueError("low >= high") - - if (( lo) == lo) and ((hi) == hi): - if size is None: - irk_discrete_uniform_vec(self.internal_state, 1, &rv_int, lo, hi) - return rv_int - else: - array = np.empty(size, np.int32) - length = cnp.PyArray_SIZE(array) - array_int_data = cnp.PyArray_DATA(array) - with self.lock, nogil: - irk_discrete_uniform_vec(self.internal_state, length, array_int_data, lo, hi) - return array - else: - if size is None: - irk_discrete_uniform_long_vec(self.internal_state, 1, &rv_long, lo, hi) - return rv_long - else: - array = np.empty(size, int) - length = cnp.PyArray_SIZE(array) - array_long_data = cnp.PyArray_DATA(array) - with self.lock, nogil: - irk_discrete_uniform_long_vec(self.internal_state, length, array_long_data, lo, hi) - return array - - def bytes(self, cnp.npy_intp length): - """ - bytes(length) - - Return random bytes. - - Parameters - ---------- - length : int - Number of random bytes. - - Returns - ------- - out : str - String of length `length`. - - Examples - -------- - >>> mkl_random.bytes(10) - ' eh\\x85\\x022SZ\\xbf\\xa4' #random - - """ - cdef void *bytes - bytestring = empty_py_bytes(length, &bytes) - with self.lock, nogil: - irk_fill(bytes, length, self.internal_state) - return bytestring - - - def choice(self, a, size=None, replace=True, p=None): - """ - choice(a, size=None, replace=True, p=None) - - Generates a random sample from a given 1-D array - - .. versionadded:: 1.7.0 + .. versionadded:: 1.7.0 Parameters ----------- @@ -4634,7 +4434,6 @@ cdef class RandomState: return vec_discnp_array_sc(self.internal_state, irk_binomial_vec, size, ln, fp, self.lock) - PyErr_Clear() on = cnp.PyArray_FROM_OTF(n, cnp.NPY_LONG, cnp.NPY_ARRAY_IN_ARRAY) @@ -4818,11 +4617,13 @@ cdef class RandomState: """ cdef cnp.ndarray olam cdef double flam + poisson_lam_max = np.iinfo('l').max - np.sqrt(np.iinfo('l').max)*10 + flam = PyFloat_AsDouble(lam) if not PyErr_Occurred(): if lam < 0: raise ValueError("lam < 0") - if lam > self.poisson_lam_max: + if lam > poisson_lam_max: raise ValueError("lam value too large") method = choose_method(method, [POISNORM, PTPE], _method_alias_dict_poisson); if method is POISNORM: @@ -4835,7 +4636,7 @@ cdef class RandomState: olam = cnp.PyArray_FROM_OTF(lam, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY) if np.any(np.less(olam, 0)): raise ValueError("lam < 0") - if np.any(np.greater(olam, self.poisson_lam_max)): + if np.any(np.greater(olam, poisson_lam_max)): raise ValueError("lam value too large.") method = choose_method(method, [POISNORM, PTPE], _method_alias_dict_poisson); if method is POISNORM: @@ -5229,9 +5030,9 @@ cdef class RandomState: self.lock) # Multivariate distributions: - def multivariate_normal(self, mean, cov, size=None): + def multivariate_normal(self, mean, cov, size=None, check_valid="warn", tol=1e-8): """ - multivariate_normal(mean, cov[, size]) + multivariate_normal(mean, cov[, size, check_valid, tol]) Draw random samples from a multivariate normal distribution. @@ -5365,181 +5166,30 @@ cdef class RandomState: # not zero. We continue to use the SVD rather than Cholesky in # order to preserve current outputs. Note that symmetry has not # been checked. + + # ensure double to make tol meaningful + cov = cov.astype(np.double) (u, s, v) = svd(cov) - neg = (np.sum(u.T * v, axis=1) < 0) & (s > 0) - if np.any(neg): - s[neg] = 0. - warnings.warn("covariance is not positive-semidefinite.", - RuntimeWarning) + + if check_valid != "ignore": + if check_valid != "warn" and check_valid != "raise": + raise ValueError( + "check_valid must equal 'warn', 'raise', or 'ignore'") + + psd = np.allclose(np.dot(v.T * s, v), cov, rtol=tol, atol=tol) + if not psd: + if check_valid == "warn": + warnings.warn("covariance is not symmetric positive-semidefinite.", + RuntimeWarning) + else: + raise ValueError( + "covariance is not symmetric positive-semidefinite.") x = np.dot(x, np.sqrt(s)[:, None] * v) x += mean x.shape = tuple(final_shape) return x - def multinormal_cholesky(self, mean, ch, size=None, method=ICDF): - """ - multivariate_normal(mean, ch, size=None, method='ICDF') - - Draw random samples from a multivariate normal distribution. - - The multivariate normal, multinormal or Gaussian distribution is a - generalization of the one-dimensional normal distribution to higher - dimensions. Such a distribution is specified by its mean and - covariance matrix, specified by its lower triangular Cholesky factor. - These parameters are analogous to the mean - (average or "center") and standard deviation, or "width," - of the one-dimensional normal distribution. - - Parameters - ---------- - mean : 1-D array_like, of length N - Mean of the N-dimensional distribution. - ch : 2-D array_like, of shape (N, N) - Cholesky factor of the covariance matrix of the distribution. Only lower-triangular - part of the matrix is actually used. - size : int or tuple of ints, optional - Given a shape of, for example, ``(m,n,k)``, ``m*n*k`` samples are - generated, and packed in an `m`-by-`n`-by-`k` arrangement. Because - each sample is `N`-dimensional, the output shape is ``(m,n,k,N)``. - If no shape is specified, a single (`N`-D) sample is returned. - method : 'ICDF, 'BoxMuller', 'BoxMuller2', optional - Sampling method used by Intel MKL. Can also be specified using - tokens mkl_random.ICDF, mkl_random.BOXMULLER, mkl_random.BOXMULLER2 - - Returns - ------- - out : ndarray - The drawn samples, of shape *size*, if that was provided. If not, - the shape is ``(N,)``. - - In other words, each entry ``out[i,j,...,:]`` is an N-dimensional - value drawn from the distribution. - - Notes - ----- - The mean is a coordinate in N-dimensional space, which represents the - location where samples are most likely to be generated. This is - analogous to the peak of the bell curve for the one-dimensional or - univariate normal distribution. - - Covariance indicates the level to which two variables vary together. - From the multivariate normal distribution, we draw N-dimensional - samples, :math:`X = [x_1, x_2, ... x_N]`. The covariance matrix - element :math:`C_{ij}` is the covariance of :math:`x_i` and :math:`x_j`. - The element :math:`C_{ii}` is the variance of :math:`x_i` (i.e. its - "spread"). - - Instead of specifying the full covariance matrix, popular - approximations include: - - - Spherical covariance (*cov* is a multiple of the identity matrix) - - Diagonal covariance (*cov* has non-negative elements, and only on - the diagonal) - - This geometrical property can be seen in two dimensions by plotting - generated data-points: - - >>> mean = [0, 0] - >>> cov = [[1, 0], [0, 100]] # diagonal covariance - - Diagonal covariance means that points are oriented along x or y-axis: - - >>> import matplotlib.pyplot as plt - >>> x, y = mkl_random.multivariate_normal(mean, cov, 5000).T - >>> plt.plot(x, y, 'x') - >>> plt.axis('equal') - >>> plt.show() - - Note that the covariance matrix must be positive semidefinite (a.k.a. - nonnegative-definite). Otherwise, the behavior of this method is - undefined and backwards compatibility is not guaranteed. - - References - ---------- - .. [1] Papoulis, A., "Probability, Random Variables, and Stochastic - Processes," 3rd ed., New York: McGraw-Hill, 1991. - .. [2] Duda, R. O., Hart, P. E., and Stork, D. G., "Pattern - Classification," 2nd ed., New York: Wiley, 2001. - - Examples - -------- - >>> mean = (1, 2) - >>> cov = [[1, 0], [0, 1]] - >>> x = mkl_random.multivariate_normal(mean, cov, (3, 3)) - >>> x.shape - (3, 3, 2) - - The following is probably true, given that 0.6 is roughly twice the - standard deviation: - - >>> list((x[0,0,:] - mean) < 0.6) - [True, True] - - """ - cdef cnp.ndarray resarr "arrayObject_resarr" - cdef cnp.ndarray marr "arrayObject_marr" - cdef cnp.ndarray tarr "arrayObject_tarr" - cdef double *res_data - cdef double *mean_data - cdef double *t_data - cdef cnp.npy_intp dim, n - cdef ch_st_enum storage_mode - - # Check preconditions on arguments - marr = cnp.PyArray_FROM_OTF(mean, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY) - tarr = cnp.PyArray_FROM_OTF(ch, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY) - - if size is None: - shape = [] - elif isinstance(size, (int, np.integer)): - shape = [size] - else: - shape = size - - if marr.ndim != 1: - raise ValueError("mean must be 1 dimensional") - dim = marr.shape[0]; - if (tarr.ndim == 2): - storage_mode = MATRIX - if (tarr.shape[0] != tarr.shape[1]): - raise ValueError("ch must be a square lower triangular 2-dimensional array or a row-packed one-dimensional representation of such") - if dim != tarr.shape[0]: - raise ValueError("mean and ch must have consistent shapes") - elif (tarr.ndim == 1): - if (tarr.shape[0] == dim): - storage_mode = DIAGONAL - elif (tarr.shape[0] == packed_cholesky_size(dim)): - storage_mode = PACKED - else: - raise ValueError("ch must be a square lower triangular 2-dimensional array or a row-packed one-dimensional representation of such") - else: - raise ValueError("ch must be a square lower triangular 2-dimensional array or a row-packed one-dimensional representation of such") - - # Compute shape of output and create a matrix of independent - # standard normally distributed random numbers. The matrix has rows - # with the same length as mean and as many rows are necessary to - # form a matrix of shape final_shape. - final_shape = list(shape[:]) - final_shape.append(int(dim)) - - resarr = np.empty(final_shape, np.float64) - res_data = cnp.PyArray_DATA(resarr) - mean_data = cnp.PyArray_DATA(marr) - t_data = cnp.PyArray_DATA(tarr) - - n = cnp.PyArray_SIZE(resarr) // dim - - method = choose_method(method, [ICDF, BOXMULLER2, BOXMULLER], _method_alias_dict_gaussian) - if (method is ICDF): - irk_multinormal_vec_ICDF(self.internal_state, n, res_data, dim, mean_data, t_data, storage_mode) - elif (method is BOXMULLER2): - irk_multinormal_vec_BM2(self.internal_state, n, res_data, dim, mean_data, t_data, storage_mode) - else: - irk_multinormal_vec_BM1(self.internal_state, n, res_data, dim, mean_data, t_data, storage_mode) - - return resarr - def multinomial(self, int n, object pvals, size=None): """ multinomial(n, pvals, size=None, method='ICDF') @@ -5904,18 +5554,446 @@ cdef class RandomState: return arr -def __RandomState_ctor(): - """Return a RandomState instance. - This function exists solely to assist (un)pickling. - Note that the state of the RandomState returned here is irrelevant, as this function's - entire purpose is to return a newly allocated RandomState whose state pickle can set. - Consequently the RandomState returned by this function is a freshly allocated copy - with a seed=0. - See https://github.com/numpy/numpy/issues/4763 for a detailed discussion +cdef class MKLRandomState(_MKLRandomState): """ - return RandomState(seed=0) + MKLRandomState(seed=None, brng='MT19937') + + Container for the Intel(R) MKL-powered (pseudo-)random number generators. + + `MKLRandomState` exposes a number of methods for generating random numbers + drawn from a variety of probability distributions. In addition to the + distribution-specific arguments, each method takes a keyword argument + `size` that defaults to ``None``. If `size` is ``None``, then a single + value is generated and returned. If `size` is an integer, then a 1-D + array filled with generated values is returned. If `size` is a tuple, + then an array with that shape is filled and returned. + + *Compatibility Notice* + While this class shares some similarities with the original `RandomState`, + it has been rewritten to use MKL's vector statistics functionality, that + provides efficient implementation of the MT19937 and many other basic + psuedo-random number generation algorithms as well as efficient sampling + from other common statistical distributions. As a consequence this version + is NOT seed-compatible with the original `RandomState`. + + Parameters + ---------- + seed : {None, int, array_like}, optional + Random seed initializing the pseudo-random number generator. + Can be an integer, an array (or other sequence) of integers of + any length, or ``None`` (the default). + If `seed` is ``None``, then `RandomState` will try to read data from + ``/dev/urandom`` (or the Windows analogue) if available or seed from + the clock otherwise. + brng : {'MT19937', 'SFMT19937', 'MT2203', 'R250', 'WH', 'MCG31', 'MCG59', + 'MRG32K3A', 'PHILOX4X32X10', 'NONDETERM', 'ARS5'}, optional + basic pseudo-random number generation algorithms, or non-deterministic + hardware-based generator, provided by Intel MKL. The default choice is + 'MT19937' - the Mersenne Twister generator. + + Notes + ----- + The Python stdlib module "random" also contains a Mersenne Twister + pseudo-random number generator with a number of methods that are similar + to the ones available in `MKLRandomState`. `MKLRandomState`, besides being + NumPy-aware, has the advantage that it provides a much larger number + of probability distributions to choose from. + + References + ----- + MKL Documentation: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl.html + + """ + + def leapfrog(self, int k, int nstreams): + """ + leapfrog(k, nstreams) + + Initializes the current state generator using leap-frog method, + if supported for the basic random pseudo-random number generation + algorithm. + """ + cdef int err, brng_id + + err = irk_leapfrog_stream_mkl(self.internal_state, k, nstreams); + + if err == -1: + raise ValueError('The stream state buffer is corrupted') + elif err == 1: + with self.lock: + brng_id = irk_get_brng_mkl(self.internal_state) + raise ValueError("Leap-frog method of stream initialization is not supported for " + str(_brng_id_to_name(brng_id))) + + def skipahead(self, long long int nskips): + """ + skipahead(nskips) + + Initializes the current state generator using skip-ahead method, + if supported for the basic random pseudo-random number generation + algorithm. + """ + cdef int err, brng_id + + err = irk_skipahead_stream_mkl(self.internal_state, nskips); + + if err == -1: + raise ValueError('The stream state buffer is corrupted') + elif err == 1: + with self.lock: + brng_id = irk_get_brng_mkl(self.internal_state) + raise ValueError("Skip-ahead method of stream initialization is not supported for " + str(_brng_id_to_name(brng_id))) + + def tomaxint(self, size=None): + """ + tomaxint(size=None) + + Return a sample of uniformly distributed random integers in the interval + [0, ``np.iinfo("long").max``]. + + Parameters + ---------- + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. Default is None, in which case a + single value is returned. + + Returns + ------- + out : ndarray + Drawn samples, with shape `size`. + + See Also + -------- + randint : Uniform sampling over a given half-open interval of integers. + random_integers : Uniform sampling over a given closed interval of + integers. + + Examples + -------- + >>> RS = mkl_random.MKLRandomState() # need a MKLRandomState object + >>> RS.tomaxint((2,2,2)) + array([[[1170048599, 1600360186], + [ 739731006, 1947757578]], + [[1871712945, 752307660], + [1601631370, 1479324245]]]) + >>> import sys + >>> sys.maxint + 2147483647 + >>> RS.tomaxint((2,2,2)) < sys.maxint + array([[[ True, True], + [ True, True]], + [[ True, True], + [ True, True]]], dtype=bool) + + """ + return vec_long_disc0_array(self.internal_state, irk_long_vec, size, self.lock) + + def randint_untyped(self, low, high=None, size=None): + """ + randint_untyped(low, high=None, size=None, dtype=int) + + Return random integers from `low` (inclusive) to `high` (exclusive). + + Return random integers from the "discrete uniform" distribution of + the specified dtype in the "half-open" interval [`low`, `high`). If + `high` is None (the default), then results are from [0, `low`). + + Parameters + ---------- + low : int + Lowest (signed) integer to be drawn from the distribution (unless + ``high=None``, in which case this parameter is the *highest* such + integer). + high : int, optional + If provided, one above the largest (signed) integer to be drawn + from the distribution (see above for behavior if ``high=None``). + size : int or tuple of ints, optional + Output shape. If the given shape is, e.g., ``(m, n, k)``, then + ``m * n * k`` samples are drawn. Default is None, in which case a + single value is returned. + + Returns + ------- + out : int or ndarray of ints + `size`-shaped array of random integers from the appropriate + distribution, or a single such random int if `size` not provided. + + See Also + -------- + random.random_integers : similar to `randint`, only for the closed + interval [`low`, `high`], and 1 is the lowest value if `high` is + omitted. In particular, this other one is the one to use to generate + uniformly distributed discrete non-integers. + + Examples + -------- + >>> mkl_random.randint(2, size=10) + array([1, 0, 0, 0, 1, 1, 0, 0, 1, 0]) + >>> mkl_random.randint(1, size=10) + array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) + + Generate a 2 x 4 array of ints between 0 and 4, inclusive: + + >>> mkl_random.randint(5, size=(2, 4)) + array([[4, 0, 2, 1], + [3, 2, 2, 0]]) + + """ + cdef long lo, hi + cdef long *array_long_data + cdef int * array_int_data + cdef cnp.ndarray array "arrayObject" + cdef cnp.npy_intp length + cdef int rv_int + cdef long rv_long + + if high is None: + lo = 0 + hi = low + else: + lo = low + hi = high + + if lo >= hi : + raise ValueError("low >= high") + + if (( lo) == lo) and ((hi) == hi): + if size is None: + irk_discrete_uniform_vec(self.internal_state, 1, &rv_int, lo, hi) + return rv_int + else: + array = np.empty(size, np.int32) + length = cnp.PyArray_SIZE(array) + array_int_data = cnp.PyArray_DATA(array) + with self.lock, nogil: + irk_discrete_uniform_vec(self.internal_state, length, array_int_data, lo, hi) + return array + else: + if size is None: + irk_discrete_uniform_long_vec(self.internal_state, 1, &rv_long, lo, hi) + return rv_long + else: + array = np.empty(size, int) + length = cnp.PyArray_SIZE(array) + array_long_data = cnp.PyArray_DATA(array) + with self.lock, nogil: + irk_discrete_uniform_long_vec(self.internal_state, length, array_long_data, lo, hi) + return array + + def multinormal_cholesky(self, mean, ch, size=None, method=ICDF): + """ + multivariate_normal(mean, ch, size=None, method='ICDF') + + Draw random samples from a multivariate normal distribution. + + The multivariate normal, multinormal or Gaussian distribution is a + generalization of the one-dimensional normal distribution to higher + dimensions. Such a distribution is specified by its mean and + covariance matrix, specified by its lower triangular Cholesky factor. + These parameters are analogous to the mean + (average or "center") and standard deviation, or "width," + of the one-dimensional normal distribution. + + Parameters + ---------- + mean : 1-D array_like, of length N + Mean of the N-dimensional distribution. + ch : 2-D array_like, of shape (N, N) + Cholesky factor of the covariance matrix of the distribution. Only lower-triangular + part of the matrix is actually used. + size : int or tuple of ints, optional + Given a shape of, for example, ``(m,n,k)``, ``m*n*k`` samples are + generated, and packed in an `m`-by-`n`-by-`k` arrangement. Because + each sample is `N`-dimensional, the output shape is ``(m,n,k,N)``. + If no shape is specified, a single (`N`-D) sample is returned. + method : 'ICDF, 'BoxMuller', 'BoxMuller2', optional + Sampling method used by Intel MKL. Can also be specified using + tokens mkl_random.ICDF, mkl_random.BOXMULLER, mkl_random.BOXMULLER2 + + Returns + ------- + out : ndarray + The drawn samples, of shape *size*, if that was provided. If not, + the shape is ``(N,)``. + + In other words, each entry ``out[i,j,...,:]`` is an N-dimensional + value drawn from the distribution. + + Notes + ----- + The mean is a coordinate in N-dimensional space, which represents the + location where samples are most likely to be generated. This is + analogous to the peak of the bell curve for the one-dimensional or + univariate normal distribution. + + Covariance indicates the level to which two variables vary together. + From the multivariate normal distribution, we draw N-dimensional + samples, :math:`X = [x_1, x_2, ... x_N]`. The covariance matrix + element :math:`C_{ij}` is the covariance of :math:`x_i` and :math:`x_j`. + The element :math:`C_{ii}` is the variance of :math:`x_i` (i.e. its + "spread"). + + Instead of specifying the full covariance matrix, popular + approximations include: + + - Spherical covariance (*cov* is a multiple of the identity matrix) + - Diagonal covariance (*cov* has non-negative elements, and only on + the diagonal) + + This geometrical property can be seen in two dimensions by plotting + generated data-points: + + >>> mean = [0, 0] + >>> cov = [[1, 0], [0, 100]] # diagonal covariance + + Diagonal covariance means that points are oriented along x or y-axis: + + >>> import matplotlib.pyplot as plt + >>> x, y = mkl_random.multivariate_normal(mean, cov, 5000).T + >>> plt.plot(x, y, 'x') + >>> plt.axis('equal') + >>> plt.show() + + Note that the covariance matrix must be positive semidefinite (a.k.a. + nonnegative-definite). Otherwise, the behavior of this method is + undefined and backwards compatibility is not guaranteed. + + References + ---------- + .. [1] Papoulis, A., "Probability, Random Variables, and Stochastic + Processes," 3rd ed., New York: McGraw-Hill, 1991. + .. [2] Duda, R. O., Hart, P. E., and Stork, D. G., "Pattern + Classification," 2nd ed., New York: Wiley, 2001. + + Examples + -------- + >>> mean = (1, 2) + >>> cov = [[1, 0], [0, 1]] + >>> x = mkl_random.multivariate_normal(mean, cov, (3, 3)) + >>> x.shape + (3, 3, 2) + + The following is probably true, given that 0.6 is roughly twice the + standard deviation: + + >>> list((x[0,0,:] - mean) < 0.6) + [True, True] + + """ + cdef cnp.ndarray resarr "arrayObject_resarr" + cdef cnp.ndarray marr "arrayObject_marr" + cdef cnp.ndarray tarr "arrayObject_tarr" + cdef double *res_data + cdef double *mean_data + cdef double *t_data + cdef cnp.npy_intp dim, n + cdef ch_st_enum storage_mode + + # Check preconditions on arguments + marr = cnp.PyArray_FROM_OTF(mean, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY) + tarr = cnp.PyArray_FROM_OTF(ch, cnp.NPY_DOUBLE, cnp.NPY_ARRAY_IN_ARRAY) + + if size is None: + shape = [] + elif isinstance(size, (int, np.integer)): + shape = [size] + else: + shape = size + + if marr.ndim != 1: + raise ValueError("mean must be 1 dimensional") + dim = marr.shape[0]; + if (tarr.ndim == 2): + storage_mode = MATRIX + if (tarr.shape[0] != tarr.shape[1]): + raise ValueError("ch must be a square lower triangular 2-dimensional array or a row-packed one-dimensional representation of such") + if dim != tarr.shape[0]: + raise ValueError("mean and ch must have consistent shapes") + elif (tarr.ndim == 1): + if (tarr.shape[0] == dim): + storage_mode = DIAGONAL + elif (tarr.shape[0] == packed_cholesky_size(dim)): + storage_mode = PACKED + else: + raise ValueError("ch must be a square lower triangular 2-dimensional array or a row-packed one-dimensional representation of such") + else: + raise ValueError("ch must be a square lower triangular 2-dimensional array or a row-packed one-dimensional representation of such") + + # Compute shape of output and create a matrix of independent + # standard normally distributed random numbers. The matrix has rows + # with the same length as mean and as many rows are necessary to + # form a matrix of shape final_shape. + final_shape = list(shape[:]) + final_shape.append(int(dim)) + + resarr = np.empty(final_shape, np.float64) + res_data = cnp.PyArray_DATA(resarr) + mean_data = cnp.PyArray_DATA(marr) + t_data = cnp.PyArray_DATA(tarr) + + n = cnp.PyArray_SIZE(resarr) // dim + + method = choose_method(method, [ICDF, BOXMULLER2, BOXMULLER], _method_alias_dict_gaussian) + if (method is ICDF): + irk_multinormal_vec_ICDF(self.internal_state, n, res_data, dim, mean_data, t_data, storage_mode) + elif (method is BOXMULLER2): + irk_multinormal_vec_BM2(self.internal_state, n, res_data, dim, mean_data, t_data, storage_mode) + else: + irk_multinormal_vec_BM1(self.internal_state, n, res_data, dim, mean_data, t_data, storage_mode) + + return resarr + + +class RandomState(MKLRandomState): + """ + RandomState(seed=None, brng='MT19937') + + Container for the Intel(R) MKL-powered (pseudo-)random number generators. + + Maintains the former name `RandomState` for compatibility with legacy + code, pending transition to `MKLRandomState`. + """ + def __init__(self, seed=None, brng="MT19937"): + # deprecation warning for the RandomState class + warnings.warn( + "RandomState is deprecated since mkl_random 1.3.2 and will be " + "removed in a future release. The drop-in replacement for " + "RandomState is now located in mkl_random.interfaces.numpy_random. " + "MKL-specific random number generation functionality is available " + "in MKLRandomState.", + DeprecationWarning, + stacklevel=2, + ) + super().__init__(seed=seed, brng=brng) + + +def __MKLRandomState_ctor(): + """ + Return a MKLRandomState instance. + This function exists solely to assist (un)pickling. + Note that the state of the MKLRandomState returned here is irrelevant, as this function's + entire purpose is to return a newly allocated MKLRandomState whose state pickle can set. + Consequently the MKLRandomState returned by this function is a freshly allocated copy + with a seed=0. + See https://github.com/numpy/numpy/issues/4763 for a detailed discussion + """ + return MKLRandomState(seed=0) + + +def __RandomState_ctor(): + """ + Return a RandomState instance. + This function exists solely to assist (un)pickling. + Note that the state of the RandomState returned here is irrelevant, as this function's + entire purpose is to return a newly allocated RandomState whose state pickle can set. + Consequently the RandomState returned by this function is a freshly allocated copy + with a seed=0. + See https://github.com/numpy/numpy/issues/4763 for a detailed discussion + """ + return RandomState(seed=0) + -_rand = RandomState() +_rand = MKLRandomState() seed = _rand.seed get_state = _rand.get_state set_state = _rand.set_state diff --git a/mkl_random/tests/test_random.py b/mkl_random/tests/test_random.py index 8087466..f961ac2 100644 --- a/mkl_random/tests/test_random.py +++ b/mkl_random/tests/test_random.py @@ -30,7 +30,7 @@ import mkl_random as rnd from numpy.testing import ( assert_, assert_raises, assert_equal, - assert_warns, suppress_warnings) + suppress_warnings, assert_no_warnings) import sys import warnings @@ -39,55 +39,56 @@ def test_zero_scalar_seed(): evs_zero_seed = { - 'MT19937' : 844, 'SFMT19937' : 857, - 'WH' : 0, 'MT2203' : 890, - 'MCG31' : 0, 'R250' : 229, - 'MRG32K3A' : 0, 'MCG59' : 0 } + 'MT19937': 844, 'SFMT19937': 857, + 'WH': 0, 'MT2203': 890, + 'MCG31': 0, 'R250': 229, + 'MRG32K3A': 0, 'MCG59': 0} for brng_algo in evs_zero_seed: - s = rnd.RandomState(0, brng = brng_algo) + s = rnd.MKLRandomState(0, brng=brng_algo) assert_equal(s.get_state()[0], brng_algo) assert_equal(s.randint(1000), evs_zero_seed[brng_algo]) + def test_max_scalar_seed(): evs_max_seed = { - 'MT19937' : 635, 'SFMT19937' : 25, - 'WH' : 100, 'MT2203' : 527, - 'MCG31' : 0, 'R250' : 229, - 'MRG32K3A' : 961, 'MCG59' : 0 } + 'MT19937': 635, 'SFMT19937': 25, + 'WH': 100, 'MT2203': 527, + 'MCG31': 0, 'R250': 229, + 'MRG32K3A': 961, 'MCG59': 0} for brng_algo in evs_max_seed: - s = rnd.RandomState(4294967295, brng = brng_algo) + s = rnd.MKLRandomState(4294967295, brng=brng_algo) assert_equal(s.get_state()[0], brng_algo) assert_equal(s.randint(1000), evs_max_seed[brng_algo]) def test_array_seed(): - s = rnd.RandomState(range(10), brng='MT19937') + s = rnd.MKLRandomState(range(10), brng='MT19937') assert_equal(s.randint(1000), 410) - s = rnd.RandomState(np.arange(10), brng='MT19937') + s = rnd.MKLRandomState(np.arange(10), brng='MT19937') assert_equal(s.randint(1000), 410) - s = rnd.RandomState([0], brng='MT19937') + s = rnd.MKLRandomState([0], brng='MT19937') assert_equal(s.randint(1000), 844) - s = rnd.RandomState([4294967295], brng='MT19937') + s = rnd.MKLRandomState([4294967295], brng='MT19937') assert_equal(s.randint(1000), 635) def test_invalid_scalar_seed(): # seed must be an unsigned 32 bit integers - pytest.raises(TypeError, rnd.RandomState, -0.5) - pytest.raises(ValueError, rnd.RandomState, -1) + pytest.raises(TypeError, rnd.MKLRandomState, -0.5) + pytest.raises(ValueError, rnd.MKLRandomState, -1) def test_invalid_array_seed(): # seed must be an unsigned 32 bit integers - pytest.raises(TypeError, rnd.RandomState, [-0.5]) - pytest.raises(ValueError, rnd.RandomState, [-1]) - pytest.raises(ValueError, rnd.RandomState, [4294967296]) - pytest.raises(ValueError, rnd.RandomState, [1, 2, 4294967296]) - pytest.raises(ValueError, rnd.RandomState, [1, -2, 4294967296]) + pytest.raises(TypeError, rnd.MKLRandomState, [-0.5]) + pytest.raises(ValueError, rnd.MKLRandomState, [-1]) + pytest.raises(ValueError, rnd.MKLRandomState, [4294967296]) + pytest.raises(ValueError, rnd.MKLRandomState, [1, 2, 4294967296]) + pytest.raises(ValueError, rnd.MKLRandomState, [1, -2, 4294967296]) def test_non_deterministic_brng(): - rs = rnd.RandomState(brng='nondeterministic') + rs = rnd.MKLRandomState(brng='nondeterministic') v = rs.rand(10) assert isinstance(v, np.ndarray) v = rs.randint(0, 10) @@ -130,11 +131,10 @@ def test_size(): assert_equal(rnd.multinomial(1, p, np.uint32(1)).shape, (1, 2)) assert_equal(rnd.multinomial(1, p, [2, 2]).shape, (2, 2, 2)) assert_equal(rnd.multinomial(1, p, (2, 2)).shape, (2, 2, 2)) - assert_equal(rnd.multinomial(1, p, np.array((2, 2))).shape, - (2, 2, 2)) + assert_equal(rnd.multinomial(1, p, np.array((2, 2))).shape, (2, 2, 2)) + + pytest.raises(TypeError, rnd.multinomial, 1, p, np.float64(1)) - pytest.raises(TypeError, rnd.multinomial, 1, p, - np.float64(1)) class RngState(NamedTuple): seed: int @@ -145,14 +145,14 @@ class RngState(NamedTuple): @pytest.fixture def rng_state(): seed = 1234567890 - prng = rnd.RandomState(seed) + prng = rnd.MKLRandomState(seed) state = prng.get_state() return RngState(seed, prng, state) def test_set_state_basic(rng_state): sample_ref = rng_state.prng.tomaxint(16) - new_rng = rnd.RandomState() + new_rng = rnd.MKLRandomState() new_rng.set_state(rng_state.state) sample_from_new = new_rng.tomaxint(16) assert_equal(sample_ref, sample_from_new) @@ -161,7 +161,7 @@ def test_set_state_basic(rng_state): def test_set_state_gaussian_reset(rng_state): # Make sure the cached every-other-Gaussian is reset. sample_ref = rng_state.prng.standard_normal(size=3) - new_rng = rnd.RandomState() + new_rng = rnd.MKLRandomState() new_rng.set_state(rng_state.state) sample_from_new = new_rng.standard_normal(size=3) assert_equal(sample_ref, sample_from_new) @@ -174,7 +174,7 @@ def test_set_state_gaussian_reset_in_media_res(rng_state): _ = prng.standard_normal() state_after_draw = prng.get_state() sample_ref = prng.standard_normal(size=3) - new_rng = rnd.RandomState() + new_rng = rnd.MKLRandomState() new_rng.set_state(state_after_draw) sample_from_new = new_rng.standard_normal(size=3) assert_equal(sample_ref, sample_from_new) @@ -186,7 +186,7 @@ def test_set_state_backward_compatibility(rng_state): if len(rng_state.state) == 5: state_old_format = rng_state.state[:-2] x1 = rng_state.prng.standard_normal(size=16) - new_rng = rnd.RandomState() + new_rng = rnd.MKLRandomState() new_rng.set_state(state_old_format) x2 = new_rng.standard_normal(size=16) new_rng.set_state(rng_state.state) @@ -203,8 +203,8 @@ def test_set_state_negative_binomial(rng_state): class RandIntData(NamedTuple): - rfunc : object - itype : list + rfunc: object + itype: list @pytest.fixture @@ -262,14 +262,14 @@ def test_randint_repeatability(randint): # in the range [0, 6) for all but np.bool, where the range # is [0, 2). Hashes are for little endian numbers. tgt = {'bool': '4fee98a6885457da67c39331a9ec336f', - 'int16': '80a5ff69c315ab6f80b03da1d570b656', - 'int32': '15a3c379b6c7b0f296b162194eab68bc', - 'int64': 'ea9875f9334c2775b00d4976b85a1458', - 'int8': '0f56333af47de94930c799806158a274', - 'uint16': '80a5ff69c315ab6f80b03da1d570b656', - 'uint32': '15a3c379b6c7b0f296b162194eab68bc', - 'uint64': 'ea9875f9334c2775b00d4976b85a1458', - 'uint8': '0f56333af47de94930c799806158a274'} + 'int16': '80a5ff69c315ab6f80b03da1d570b656', + 'int32': '15a3c379b6c7b0f296b162194eab68bc', + 'int64': 'ea9875f9334c2775b00d4976b85a1458', + 'int8': '0f56333af47de94930c799806158a274', + 'uint16': '80a5ff69c315ab6f80b03da1d570b656', + 'uint32': '15a3c379b6c7b0f296b162194eab68bc', + 'uint64': 'ea9875f9334c2775b00d4976b85a1458', + 'uint8': '0f56333af47de94930c799806158a274'} for dt in randint.itype[1:]: rnd.seed(1234, brng='MT19937') @@ -306,12 +306,12 @@ def test_randint_respect_dtype_singleton(randint): # gh-7284: Ensure that we get Python data types sample = randint.rfunc(lbnd, ubnd, dtype=dt) assert not hasattr(sample, 'dtype') - assert (type(sample) == dt) + assert (type(sample) is dt) class RandomDistData(NamedTuple): - seed : int - brng : str + seed: int + brng: str @pytest.fixture @@ -321,9 +321,10 @@ def randomdist(): # Make sure the random distribution returns the correct value for a # given seed. Low value of decimal argument is intended, since functional -# transformations's implementation or approximations thereof used to produce non-uniform -# random variates can vary across platforms, yet be statistically indistinguishable to the end user, -# that is no computationally feasible statistical experiment can detect the difference. +# transformations's implementation or approximations thereof used to produce +# non-uniform random variates can vary across platforms, yet be statistically +# indistinguishable to the end user, that is no computationally feasible +# statistical experiment can detect the difference. def test_randomdist_rand(randomdist): rnd.seed(randomdist.seed, brng=randomdist.brng) @@ -369,8 +370,7 @@ def test_random_integers_max_int(): # to generate this integer. with suppress_warnings() as sup: w = sup.record(DeprecationWarning) - actual = rnd.random_integers(np.iinfo('l').max, - np.iinfo('l').max) + actual = rnd.random_integers(np.iinfo('l').max, np.iinfo('l').max) assert len(w) == 1 desired = np.iinfo('l').max np.testing.assert_equal(actual, desired) @@ -381,14 +381,17 @@ def test_random_integers_deprecated(): warnings.simplefilter("error", DeprecationWarning) # DeprecationWarning raised with high == None - assert_raises(DeprecationWarning, - rnd.random_integers, - np.iinfo('l').max) + assert_raises( + DeprecationWarning, rnd.random_integers, np.iinfo('l').max + ) # DeprecationWarning raised with high != None - assert_raises(DeprecationWarning, - rnd.random_integers, - np.iinfo('l').max, np.iinfo('l').max) + assert_raises( + DeprecationWarning, + rnd.random_integers, + np.iinfo('l').max, + np.iinfo('l').max + ) def test_randomdist_random_sample(randomdist): @@ -414,13 +417,6 @@ def test_randomdist_choice_nonuniform_replace(randomdist): np.testing.assert_array_equal(actual, desired) -def test_randomdist_choice_nonuniform_replace(randomdist): - rnd.seed(randomdist.seed, brng=randomdist.brng) - actual = rnd.choice(4, 4, p=[0.4, 0.4, 0.1, 0.1]) - desired = np.array([3, 0, 0, 1]) - np.testing.assert_array_equal(actual, desired) - - def test_randomdist_choice_uniform_noreplace(randomdist): rnd.seed(randomdist.seed, brng=randomdist.brng) actual = rnd.choice(4, 3, replace=False) @@ -430,8 +426,7 @@ def test_randomdist_choice_uniform_noreplace(randomdist): def test_randomdist_choice_nonuniform_noreplace(randomdist): rnd.seed(randomdist.seed, brng=randomdist.brng) - actual = rnd.choice(4, 3, replace=False, - p=[0.1, 0.3, 0.5, 0.1]) + actual = rnd.choice(4, 3, replace=False, p=[0.1, 0.3, 0.5, 0.1]) desired = np.array([3, 0, 1]) np.testing.assert_array_equal(actual, desired) @@ -449,14 +444,16 @@ def test_choice_exceptions(): pytest.raises(ValueError, sample, 3., 3) pytest.raises(ValueError, sample, [[1, 2], [3, 4]], 3) pytest.raises(ValueError, sample, [], 3) - pytest.raises(ValueError, sample, [1, 2, 3, 4], 3, - p=[[0.25, 0.25], [0.25, 0.25]]) + pytest.raises( + ValueError, sample, [1, 2, 3, 4], 3, p=[[0.25, 0.25], [0.25, 0.25]] + ) pytest.raises(ValueError, sample, [1, 2], 3, p=[0.4, 0.4, 0.2]) pytest.raises(ValueError, sample, [1, 2], 3, p=[1.1, -0.1]) pytest.raises(ValueError, sample, [1, 2], 3, p=[0.4, 0.4]) pytest.raises(ValueError, sample, [1, 2, 3], 4, replace=False) - pytest.raises(ValueError, sample, [1, 2, 3], 2, replace=False, - p=[1, 0, 0]) + pytest.raises( + ValueError, sample, [1, 2, 3], 2, replace=False, p=[1, 0, 0] + ) def test_choice_return_shape(): @@ -507,18 +504,19 @@ def test_randomdist_shuffle(randomdist): # Test lists, arrays (of various dtypes), and multidimensional versions # of both, c-contiguous or not: for conv in [lambda x: np.array([]), - lambda x: x, - lambda x: np.asarray(x).astype(np.int8), - lambda x: np.asarray(x).astype(np.float32), - lambda x: np.asarray(x).astype(np.complex64), - lambda x: np.asarray(x).astype(object), - lambda x: [(i, i) for i in x], - lambda x: np.asarray([[i, i] for i in x]), - lambda x: np.vstack([x, x]).T, - # gh-4270 - lambda x: np.asarray([(i, i) for i in x], - [("a", object, (1,)), - ("b", np.int32, (1,))])]: + lambda x: x, + lambda x: np.asarray(x).astype(np.int8), + lambda x: np.asarray(x).astype(np.float32), + lambda x: np.asarray(x).astype(np.complex64), + lambda x: np.asarray(x).astype(object), + lambda x: [(i, i) for i in x], + lambda x: np.asarray([[i, i] for i in x]), + lambda x: np.vstack([x, x]).T, + # gh-4270 + lambda x: np.asarray( + [(i, i) for i in x], + [("a", object, (1,)), + ("b", np.int32, (1,))])]: rnd.seed(randomdist.seed, brng=randomdist.brng) alist = conv([1, 2, 3, 4, 5, 6, 7, 8, 9, 0]) rnd.shuffle(alist) @@ -529,7 +527,7 @@ def test_randomdist_shuffle(randomdist): def test_shuffle_masked(): # gh-3263 - a = np.ma.masked_values(np.reshape(range(20), (5,4)) % 3 - 1, -1) + a = np.ma.masked_values(np.reshape(range(20), (5, 4)) % 3 - 1, -1) b = np.ma.masked_values(np.arange(20) % 3 - 1, -1) a_orig = a.copy() b_orig = b.copy() @@ -563,8 +561,8 @@ def test_randomdist_chisquare(randomdist): rnd.seed(randomdist.seed, brng=randomdist.brng) actual = rnd.chisquare(50, size=(3, 2)) desired = np.array([[50.955833609920589, 50.133178918244099], - [61.513615847062013, 50.757127871422448], - [52.79816819717081, 49.973023331993552]]) + [61.513615847062013, 50.757127871422448], + [52.79816819717081, 49.973023331993552]]) np.testing.assert_allclose(actual, desired, atol=1e-7, rtol=1e-10) @@ -573,11 +571,11 @@ def test_randomdist_dirichlet(randomdist): alpha = np.array([51.72840233779265162, 39.74494232180943953]) actual = rnd.dirichlet(alpha, size=(3, 2)) desired = np.array([[[0.6332947001908874, 0.36670529980911254], - [0.5376828907571894, 0.4623171092428107]], + [0.5376828907571894, 0.4623171092428107]], [[0.6835615930093024, 0.3164384069906976], - [0.5452378139016114, 0.45476218609838875]], + [0.5452378139016114, 0.45476218609838875]], [[0.6498494402738553, 0.3501505597261446], - [0.5622024400324822, 0.43779755996751785]]]) + [0.5622024400324822, 0.43779755996751785]]]) np.testing.assert_allclose(actual, desired, atol=4e-10, rtol=4e-10) @@ -687,8 +685,9 @@ def test_randomdist_lognormal(randomdist): [0.1769118704670423, 3.415299544410577], [1.2417099625339398, 102.0631392685238]]) np.testing.assert_allclose(actual, desired, atol=1e-6, rtol=1e-10) - actual = rnd.lognormal(mean=.123456789, sigma=2.0, size=(3,2), - method='Box-Muller2') + actual = rnd.lognormal( + mean=.123456789, sigma=2.0, size=(3, 2), method='Box-Muller2' + ) desired = np.array([[0.2585388231094821, 0.43734953048924663], [26.050836228611697, 26.76266237820882], [0.24216420175675096, 0.2481945765083541]]) @@ -703,7 +702,7 @@ def test_randomdist_logseries(randomdist): def test_randomdist_multinomial(randomdist): - rs = rnd.RandomState(randomdist.seed, brng=randomdist.brng) + rs = rnd.MKLRandomState(randomdist.seed, brng=randomdist.brng) actual = rs.multinomial(20, [1/6.]*6, size=(3, 2)) desired = np.full((3, 2), 20, dtype=actual.dtype) np.testing.assert_array_equal(actual.sum(axis=-1), desired) @@ -720,24 +719,57 @@ def test_randomdist_multivariate_normal(randomdist): # Hmm... not even symmetric. cov = [[1, 0], [1, 0]] size = (3, 2) - actual = rnd.multivariate_normal(mean, cov, size) + # ignore RuntimeWarning from non-positive-semidefinite covariance + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + actual = rnd.multivariate_normal(mean, cov, size) desired = np.array([[[-2.42282709811266, 10.0], - [1.2267795840027274, 10.0]], + [1.2267795840027274, 10.0]], [[0.06813924868067336, 10.0], - [1.001190462507746, 10.0]], + [1.001190462507746, 10.0]], [[-1.74157261455869, 10.0], - [1.0400952859037553, 10.0]]]) + [1.0400952859037553, 10.0]]]) np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10) # Check for default size, was raising deprecation warning - actual = rnd.multivariate_normal(mean, cov) + # ignore RuntimeWarning from non-positive-semidefinite covariance + with warnings.catch_warnings(): + warnings.simplefilter("ignore", RuntimeWarning) + actual = rnd.multivariate_normal(mean, cov) desired = np.array([1.0579899448949994, 10.0]) np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10) - # Check that non positive-semidefinite covariance raises warning + # Check that non positive-semidefinite covariance warns with + # RuntimeWarning mean = [0, 0] - cov = [[1, 1 + 1e-10], [1 + 1e-10, 1]] - assert_warns(RuntimeWarning, rnd.multivariate_normal, mean, cov) + cov = [[1, 2], [2, 1]] + pytest.warns(RuntimeWarning, rnd.multivariate_normal, mean, cov) + + # and that it doesn't warn with RuntimeWarning check_valid='ignore' + assert_no_warnings( + rnd.multivariate_normal, mean, cov, check_valid="ignore" + ) + + # and that it raises with RuntimeWarning check_valid='raises' + assert_raises( + ValueError, rnd.multivariate_normal, mean, cov, check_valid="raise" + ) + + cov = np.array([[1, 0.1], [0.1, 1]], dtype=np.float32) + with warnings.catch_warnings(): + warnings.simplefilter("error", RuntimeWarning) + rnd.multivariate_normal(mean, cov) + + mu = np.zeros(2) + cov = np.eye(2) + assert_raises( + ValueError, rnd.multivariate_normal, mean, cov, check_valid="other" + ) + assert_raises( + ValueError, rnd.multivariate_normal, np.zeros((2, 1, 1)), cov + ) + assert_raises(ValueError, rnd.multivariate_normal, mu, np.empty((3, 2))) + assert_raises(ValueError, rnd.multivariate_normal, mu, np.eye(3)) def test_randomdist_multinormal_cholesky(randomdist): @@ -748,11 +780,11 @@ def test_randomdist_multinormal_cholesky(randomdist): size = (3, 2) actual = rnd.multinormal_cholesky(mean, chol_mat, size, method='ICDF') desired = np.array([[[2.26461778189133, 6.857632824379853], - [-0.8043233941855025, 11.01629429884193]], + [-0.8043233941855025, 11.01629429884193]], [[0.1699731103551746, 12.227809261928217], - [-0.6146263106001378, 9.893801873973892]], + [-0.6146263106001378, 9.893801873973892]], [[1.691753328795276, 10.797627196240155], - [-0.647341237129921, 9.626899489691816]]]) + [-0.647341237129921, 9.626899489691816]]]) np.testing.assert_allclose(actual, desired, atol=1e-10, rtol=1e-10) @@ -780,8 +812,7 @@ def test_randomdist_noncentral_chisquare(randomdist): def test_randomdist_noncentral_f(randomdist): rnd.seed(randomdist.seed, brng=randomdist.brng) - actual = rnd.noncentral_f(dfnum=5, dfden=2, nonc=1, - size=(3, 2)) + actual = rnd.noncentral_f(dfnum=5, dfden=2, nonc=1, size=(3, 2)) desired = np.array([[0.2216297348371284, 0.7632696724492449], [98.67664232828238, 0.9500319825372799], [0.3489618249246971, 1.5035633972571092]]) @@ -797,14 +828,18 @@ def test_randomdist_normal(randomdist): np.testing.assert_allclose(actual, desired, atol=1e-7, rtol=1e-10) rnd.seed(randomdist.seed, brng=randomdist.brng) - actual = rnd.normal(loc=.123456789, scale=2.0, size=(3, 2), method="BoxMuller") + actual = rnd.normal( + loc=.123456789, scale=2.0, size=(3, 2), method="BoxMuller" + ) desired = np.array([[0.16673479781277187, -3.4809986872165952], [-0.05193761082535492, 3.249201213154922], [-0.11915582299214138, 3.555636100927892]]) np.testing.assert_allclose(actual, desired, atol=1e-8, rtol=1e-8) rnd.seed(randomdist.seed, brng=randomdist.brng) - actual = rnd.normal(loc=.123456789, scale=2.0, size=(3, 2), method="BoxMuller2") + actual = rnd.normal( + loc=.123456789, scale=2.0, size=(3, 2), method="BoxMuller2" + ) desired = np.array([[0.16673479781277187, 0.48153966449249175], [-3.4809986872165952, -0.8101190082826486], [-0.051937610825354905, 2.4088402362484342]]) @@ -816,8 +851,8 @@ def test_randomdist_pareto(randomdist): actual = rnd.pareto(a=.123456789, size=(3, 2)) desired = np.array( [[0.14079174875385214, 82372044085468.92], - [1247881.6368437486, 15.086855668610944], - [203.2638558933401, 0.10445383654349749]]) + [1247881.6368437486, 15.086855668610944], + [203.2638558933401, 0.10445383654349749]]) # For some reason on 32-bit x86 Ubuntu 12.10 the [1, 0] entry in this # matrix differs by 24 nulps. Discussion: # http://mail.scipy.org/pipermail/numpy-discussion/2012-September/063801.html @@ -920,8 +955,7 @@ def test_randomdist_standard_t(randomdist): def test_randomdist_triangular(randomdist): rnd.seed(randomdist.seed, brng=randomdist.brng) - actual = rnd.triangular(left=5.12, mode=10.23, right=20.34, - size=(3, 2)) + actual = rnd.triangular(left=5.12, mode=10.23, right=20.34, size=(3, 2)) desired = np.array([[18.764540652669638, 6.340166306695037], [8.827752689522429, 13.65605077739865], [11.732872979633328, 18.970392754850423]]) @@ -944,8 +978,8 @@ def test_uniform_range_bounds(): func = rnd.uniform np.testing.assert_raises(OverflowError, func, -np.inf, 0) np.testing.assert_raises(OverflowError, func, 0, np.inf) - # this should not throw any error, since rng can be sampled as fmin*u + fmax*(1-u) - # for 0