Commit ffe7de4e authored by Dahua Lin's avatar Dahua Lin

use new python script to generate reference values for continuous distributions

parent 4285a3e5
......@@ -40,8 +40,8 @@ function kurtosis(d::Weibull)
end
function entropy(d::Weibull)
λ, k = d.scale, d.shape
return ((k - 1.0) / k) * -digamma(1.0) + log(λ / k) + 1.0
k = d.shape
return 0.5772156649015328606 * (1.0 - 1.0 / k) + log(d.scale / k) + 1.0
end
## Functions
......
......@@ -87,10 +87,13 @@ for distr in [
Frechet(120.0, 1.0),
Frechet(0.5, 2.0),
Frechet(3.0, 2.0),
InverseGaussian(1.0, 1.0),
InverseGaussian(2.0, 7.0),
Levy(0.0, 1.0),
Levy(2.0, 8.0),
Levy(3.0, 3.0),
LogNormal(0.0, 1.0),
LogNormal(0.0, 2.0),
LogNormal(3.0, 0.5),
LogNormal(3.0, 1.0),
LogNormal(3.0, 2.0),
......
This diff is collapsed.
# Generate reference values based on scipy.stats
from numpy import sqrt, nan, inf
from scipy.stats import *
lst = [
(arcsine(), "Arcsine()"),
(beta(2.0, 2.0), "Beta(2.0, 2.0)"),
(beta(3.0, 4.0), "Beta(3.0, 4.0)"),
(beta(17.0, 13.0), "Beta(17.0, 13.0)"),
(betaprime(3.0, 3.0), "BetaPrime(3.0, 3.0)"),
(betaprime(3.0, 5.0), "BetaPrime(3.0, 5.0)"),
(betaprime(5.0, 3.0), "BetaPrime(5.0, 3.0)"),
(cauchy(0.0, 1.0), "Cauchy(0.0, 1.0)"),
(cauchy(10.0, 1.0), "Cauchy(10.0, 1.0)"),
(cauchy(2.0, 10.0), "Cauchy(2.0, 10.0)"),
(chi(1.0), "Chi(1)"),
(chi(2.0), "Chi(2)"),
(chi(3.0), "Chi(3)"),
(chi(12.0), "Chi(12)"),
(chi2(1.0), "Chisq(1)"),
(chi2(8.0), "Chisq(8)"),
(chi2(20.0), "Chisq(20)"),
(erlang(1, scale=1.0), "Erlang(1, 1.0)"),
(erlang(3, scale=1.0), "Erlang(3, 1.0)"),
(erlang(5, scale=2.0), "Erlang(5, 2.0)"),
(expon(scale=1.0), "Exponential(1.0)"),
(expon(scale=6.5), "Exponential(6.5)"),
(gamma(1.0, scale=1.0), "Gamma(1.0, 1.0)"),
(gamma(3.0, scale=1.0), "Gamma(3.0, 1.0)"),
(gamma(3.0, scale=2.0), "Gamma(3.0, 2.0)"),
(gumbel_r(3.0, 5.0), "Gumbel(3.0, 5.0)"),
(gumbel_r(5.0, 3.0), "Gumbel(5.0, 3.0)"),
(invgamma(1.0, scale=1.0), "InverseGamma(1.0, 1.0)"),
(invgamma(1.0, scale=2.0), "InverseGamma(1.0, 2.0)"),
(invgamma(2.0, scale=1.0), "InverseGamma(2.0, 1.0)"),
(invgamma(2.0, scale=3.0), "InverseGamma(2.0, 3.0)"),
(invgauss(1.0), "InverseGaussian(1.0, 1.0)"),
(laplace(0.0, 1.0), "Laplace(0.0, 1.0)"),
(laplace(5.0, 1.0), "Laplace(5.0, 1.0)"),
(laplace(5.0, 1.5), "Laplace(5.0, 1.5)"),
(logistic(0.0, 1.0), "Logistic(0.0, 1.0)"),
(logistic(5.0, 1.0), "Logistic(5.0, 1.0)"),
(logistic(5.0, 1.5), "Logistic(5.0, 1.5)"),
(lognorm(1.0), "LogNormal(0.0, 1.0)"),
(lognorm(2.0), "LogNormal(0.0, 2.0)"),
(norm(0.0, 1.0), "Normal(0.0, 1.0)"),
(norm(-3.0, 2.0), "Normal(-3.0, 2.0)"),
(norm(1.0, 10.0), "Normal(1.0, 10.0)"),
(norm(0.0, 1.0), "NormalCanon(0.0, 1.0)"),
(norm(-0.4, sqrt(0.4)), "NormalCanon(-1.0, 2.5)"),
(norm(2.5, sqrt(1.25)), "NormalCanon(2.0, 0.8)"),
(pareto(1.0, scale=1.0), "Pareto(1.0, 1.0)"),
(pareto(2.0, scale=1.0), "Pareto(2.0, 1.0)"),
(pareto(3.0, scale=2.0), "Pareto(3.0, 2.0)"),
(rayleigh(scale=1.0), "Rayleigh(1.0)"),
(rayleigh(scale=3.0), "Rayleigh(3.0)"),
(rayleigh(scale=8.0), "Rayleigh(8.0)"),
(t(1.0), "TDist(1.0)"),
(t(5.0), "TDist(5.0)"),
(t(28.0), "TDist(28.0)"),
(uniform(0.0, 1.0), "Uniform(0.0, 1.0)"),
(uniform(3.0, 14.0), "Uniform(3.0, 17.0)"),
(uniform(3.0, 0.1), "Uniform(3.0, 3.1)"),
(weibull_min(0.5, scale=1.0), "Weibull(0.5, 1.0)"),
(weibull_min(1.0, scale=1.0), "Weibull(1.0, 1.0)"),
(weibull_min(5.0, scale=1.0), "Weibull(5.0, 1.0)"),
(weibull_min(20.0, scale=1.0), "Weibull(20.0, 1.0)"),
(weibull_min(1.0, scale=2.0), "Weibull(1.0, 2.0)"),
(weibull_min(5.0, scale=2.0), "Weibull(5.0, 2.0)")
]
print "distr, mean, var, entropy, x25, x50, x75, p25, p50, p75"
for (d, e) in lst:
m = d.mean()
v = d.var()
ent = d.entropy()
x25 = d.ppf(0.25)
x50 = d.ppf(0.50)
x75 = d.ppf(0.75)
lp25 = d.logpdf(x25)
lp50 = d.logpdf(x50)
lp75 = d.logpdf(x75)
# workaround inconsistency of definitions
if e.startswith("Geometric("):
x25 -= 1
x50 -= 1
x75 -= 1
m -= 1
elif e == "TDist(1.0)":
m = nan
print '"%s", %.16e, %.16e, %.16e, %.16e, %.16e, %.16e, %.16e, %.16e, %.16e' % (e, m, v, ent, x25, x50, x75, lp25, lp50, lp75)
Arcsine()
Beta(2.0, 2.0)
Beta(3.0, 4.0)
Beta(17.0, 13.0)
BetaPrime(3.0, 3.0)
BetaPrime(3.0, 5.0)
BetaPrime(5.0, 3.0)
Cauchy(0.0, 1.0)
Cauchy(10.0, 1.0)
Cauchy(2.0, 10.0)
Chi(1)
Chi(2)
Chi(3)
Chi(12)
Chisq(1)
Chisq(8)
Chisq(20)
Erlang(1, 1.0)
Erlang(3, 1.0)
Erlang(5, 2.0)
Exponential(1.0)
Exponential(6.5)
Gamma(1.0, 1.0)
Gamma(3.0, 1.0)
Gamma(3.0, 2.0)
Gumbel(3.0, 5.0)
Gumbel(5.0, 3.0)
InverseGamma(1.0, 1.0)
InverseGamma(1.0, 2.0)
InverseGamma(2.0, 1.0)
InverseGamma(2.0, 3.0)
Laplace(0.0, 1.0)
Laplace(5.0, 1.0)
Laplace(5.0, 1.5)
Logistic(0.0, 1.0)
Logistic(5.0, 1.0)
Logistic(5.0, 1.5)
Normal(0.0, 1.0)
Normal(-3.0, 2.0)
Normal(1.0, 10.0)
NormalCanon(0.0, 1.0)
NormalCanon(-1.0, 2.5)
NormalCanon(2.0, 0.8)
Pareto(1.0, 1.0)
Pareto(2.0, 1.0)
Pareto(3.0, 2.0)
Rayleigh(1.0)
Rayleigh(3.0)
Rayleigh(8.0)
TDist(1.2)
TDist(5.0)
TDist(28.0)
Uniform(0.0, 1.0)
Uniform(3.0, 17.0)
Uniform(3.0, 3.1)
Weibull(0.5, 1.0)
Weibull(1.0, 1.0)
Weibull(5.0, 1.0)
Weibull(20.0, 1.0)
Weibull(1.0, 2.0)
Weibull(5.0, 2.0)
# Generate reference values based on scipy.stats
from scipy.stats import *
lst = [
(bernoulli(0.5), "Bernoulli(0.5)"),
(bernoulli(0.9), "Bernoulli(0.9)"),
(bernoulli(0.1), "Bernoulli(0.1)"),
(binom(1, 0.5), "Binomial(1, 0.5)"),
(binom(100, 0.1), "Binomial(100, 0.1)"),
(binom(100, 0.9), "Binomial(100, 0.9)"),
(randint(0, 4+1), "DiscreteUniform(0, 4)"),
(randint(2, 8+1), "DiscreteUniform(2, 8)"),
(geom(0.1), "Geometric(0.1)"),
(geom(0.5), "Geometric(0.5)"),
(geom(0.9), "Geometric(0.9)"),
(hypergeom(2+2, 2, 2), "Hypergeometric(2, 2, 2)"),
(hypergeom(3+2, 3, 2), "Hypergeometric(3, 2, 2)"),
(hypergeom(4+5, 4, 6), "Hypergeometric(4, 5, 6)"),
(hypergeom(60+80, 60, 100), "Hypergeometric(60, 80, 100)"),
(nbinom(1, 0.5), "NegativeBinomial(1, 0.5)"),
(nbinom(5, 0.6), "NegativeBinomial(5, 0.6)"),
(poisson(0.5), "Poisson(0.5)"),
(poisson(2.0), "Poisson(2.0)"),
(poisson(10.0), "Poisson(10.0)"),
(poisson(80.0), "Poisson(80.0)")
]
print "distr, mean, var, entropy, x25, x50, x75, p25, p50, p75"
for (d, e) in lst:
m = d.mean()
v = d.var()
ent = d.entropy()
x25 = d.ppf(0.25)
x50 = d.ppf(0.50)
x75 = d.ppf(0.75)
lp25 = d.logpmf(x25)
lp50 = d.logpmf(x50)
lp75 = d.logpmf(x75)
# workaround inconsistency of definitions
if e.startswith("Geometric("):
x25 -= 1
x50 -= 1
x75 -= 1
m -= 1
print '"%s", %.16e, %.16e, %.16e, %d, %d, %d, %.16e, %.16e, %.16e' % (e, m, v, ent, x25, x50, x75, lp25, lp50, lp75)
# A Python script to prepare reference values for distribution testing
import re
from numpy import sqrt, nan, inf
from scipy.stats import *
def parse_distr(s):
......@@ -9,8 +10,12 @@ def parse_distr(s):
l = s.index("(")
r = s.index(")")
name = s[:l]
terms = re.split(r",\s*", s[l+1:r])
args = tuple(float(t) for t in terms)
ts = s[l+1:r].strip()
if len(ts) > 0:
terms = re.split(r",\s*", s[l+1:r])
args = tuple(float(t) for t in terms)
else:
args = ()
return name, args
......@@ -32,42 +37,123 @@ def read_distr_list(filename):
def to_scipy_dist(name, args):
"""Convert from Julia distribution to scipy.stats distribution"""
if name == "Bernoulli":
if name == "Arcsine":
assert len(args) == 0
return arcsine()
elif name == "Bernoulli":
assert len(args) == 1
return bernoulli(args[0])
elif name == "Beta":
assert len(args) == 2
return beta(args[0], args[1])
elif name == "BetaPrime":
assert len(args) == 2
return betaprime(args[0], args[1])
elif name == "Binomial":
assert len(args) == 2
return binom(args[0], args[1])
elif name == "Cauchy":
assert len(args) == 2
return cauchy(args[0], args[1])
elif name == "Chi":
assert len(args) == 1
return chi(args[0])
elif name == "Chisq":
assert len(args) == 1
return chi2(args[0])
elif name == "DiscreteUniform":
assert len(args) == 2
return randint(args[0], args[1] + 1)
elif name == "Erlang":
assert len(args) == 2
return erlang(args[0], scale=args[1])
elif name == "Exponential":
assert len(args) == 1
return expon(scale=args[0])
elif name == "Gamma":
assert len(args) == 2
return gamma(args[0], scale=args[1])
elif name == "Geometric":
assert len(args) == 1
return geom(args[0])
elif name == "Gumbel":
assert len(args) == 2
return gumbel_r(args[0], args[1])
elif name == "Hypergeometric":
assert len(args) == 3
return hypergeom(args[0] + args[1], args[0], args[2])
elif name == "InverseGamma":
assert len(args) == 2
return invgamma(args[0], scale=args[1])
elif name == "Laplace":
assert len(args) == 2
return laplace(args[0], args[1])
elif name == "Logistic":
assert len(args) == 2
return logistic(args[0], args[1])
elif name == "NegativeBinomial":
assert len(args) == 2
return nbinom(args[0], args[1])
elif name == "Normal":
assert len(args) == 2
return norm(args[0], args[1])
elif name == "NormalCanon":
assert len(args) == 2
return norm(args[0] / args[1], sqrt(1.0 / args[1]))
elif name == "Pareto":
assert len(args) == 2
return pareto(args[0], scale=args[1])
elif name == "Poisson":
assert len(args) == 1
return poisson(args[0])
elif name == "Rayleigh":
assert len(args) == 1
return rayleigh(scale=args[0])
elif name == "TDist":
assert len(args) == 1
return t(args[0])
elif name == "Uniform":
assert len(args) == 2
return uniform(args[0], args[1] - args[0])
elif name == "Weibull":
assert len(args) == 2
return weibull_min(args[0], scale=args[1])
else:
raise ValueError("Unknown distribution name %s" % name)
def do_main(filename):
def do_main(title):
"""main skeleton"""
filename = title + "_ref"
lst = read_distr_list(filename + ".txt")
with open(filename + ".csv", "wt") as fout:
......@@ -83,9 +169,14 @@ def do_main(filename):
x50 = d.ppf(0.50)
x75 = d.ppf(0.75)
lp25 = d.logpmf(x25)
lp50 = d.logpmf(x50)
lp75 = d.logpmf(x75)
if title == "discrete":
lp25 = d.logpmf(x25)
lp50 = d.logpmf(x50)
lp75 = d.logpmf(x75)
else:
lp25 = d.logpdf(x25)
lp50 = d.logpdf(x50)
lp75 = d.logpdf(x75)
# workaround inconsistency of definitions
if name == "Geometric":
......@@ -94,10 +185,16 @@ def do_main(filename):
x75 -= 1
m -= 1
print >>fout, '"%s", %.16e, %.16e, %.16e, %d, %d, %d, %.16e, %.16e, %.16e' % (
ex, m, v, ent, x25, x50, x75, lp25, lp50, lp75)
if title == "discrete":
print >>fout, '"%s", %.16e, %.16e, %.16e, %d, %d, %d, %.16e, %.16e, %.16e' % (
ex, m, v, ent, x25, x50, x75, lp25, lp50, lp75)
else:
print >>fout, '"%s", %.16e, %.16e, %.16e, %.16e, %.16e, %.16e, %.16e, %.16e, %.16e' % (
ex, m, v, ent, x25, x50, x75, lp25, lp50, lp75)
if __name__ == "__main__":
do_main("discrete_ref")
do_main("discrete")
do_main("continuous")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment