### use new python script to generate reference values for continuous distributions

parent 4285a3e5
 ... ... @@ -40,8 +40,8 @@ function kurtosis(d::Weibull) end function entropy(d::Weibull) λ, k = d.scale, d.shape return ((k - 1.0) / k) * -digamma(1.0) + log(λ / k) + 1.0 k = d.shape return 0.5772156649015328606 * (1.0 - 1.0 / k) + log(d.scale / k) + 1.0 end ## Functions ... ...
 ... ... @@ -87,10 +87,13 @@ for distr in [ Frechet(120.0, 1.0), Frechet(0.5, 2.0), Frechet(3.0, 2.0), InverseGaussian(1.0, 1.0), InverseGaussian(2.0, 7.0), Levy(0.0, 1.0), Levy(2.0, 8.0), Levy(3.0, 3.0), LogNormal(0.0, 1.0), LogNormal(0.0, 2.0), LogNormal(3.0, 0.5), LogNormal(3.0, 1.0), LogNormal(3.0, 2.0), ... ...
This diff is collapsed.
 # Generate reference values based on scipy.stats from numpy import sqrt, nan, inf from scipy.stats import * lst = [ (arcsine(), "Arcsine()"), (beta(2.0, 2.0), "Beta(2.0, 2.0)"), (beta(3.0, 4.0), "Beta(3.0, 4.0)"), (beta(17.0, 13.0), "Beta(17.0, 13.0)"), (betaprime(3.0, 3.0), "BetaPrime(3.0, 3.0)"), (betaprime(3.0, 5.0), "BetaPrime(3.0, 5.0)"), (betaprime(5.0, 3.0), "BetaPrime(5.0, 3.0)"), (cauchy(0.0, 1.0), "Cauchy(0.0, 1.0)"), (cauchy(10.0, 1.0), "Cauchy(10.0, 1.0)"), (cauchy(2.0, 10.0), "Cauchy(2.0, 10.0)"), (chi(1.0), "Chi(1)"), (chi(2.0), "Chi(2)"), (chi(3.0), "Chi(3)"), (chi(12.0), "Chi(12)"), (chi2(1.0), "Chisq(1)"), (chi2(8.0), "Chisq(8)"), (chi2(20.0), "Chisq(20)"), (erlang(1, scale=1.0), "Erlang(1, 1.0)"), (erlang(3, scale=1.0), "Erlang(3, 1.0)"), (erlang(5, scale=2.0), "Erlang(5, 2.0)"), (expon(scale=1.0), "Exponential(1.0)"), (expon(scale=6.5), "Exponential(6.5)"), (gamma(1.0, scale=1.0), "Gamma(1.0, 1.0)"), (gamma(3.0, scale=1.0), "Gamma(3.0, 1.0)"), (gamma(3.0, scale=2.0), "Gamma(3.0, 2.0)"), (gumbel_r(3.0, 5.0), "Gumbel(3.0, 5.0)"), (gumbel_r(5.0, 3.0), "Gumbel(5.0, 3.0)"), (invgamma(1.0, scale=1.0), "InverseGamma(1.0, 1.0)"), (invgamma(1.0, scale=2.0), "InverseGamma(1.0, 2.0)"), (invgamma(2.0, scale=1.0), "InverseGamma(2.0, 1.0)"), (invgamma(2.0, scale=3.0), "InverseGamma(2.0, 3.0)"), (invgauss(1.0), "InverseGaussian(1.0, 1.0)"), (laplace(0.0, 1.0), "Laplace(0.0, 1.0)"), (laplace(5.0, 1.0), "Laplace(5.0, 1.0)"), (laplace(5.0, 1.5), "Laplace(5.0, 1.5)"), (logistic(0.0, 1.0), "Logistic(0.0, 1.0)"), (logistic(5.0, 1.0), "Logistic(5.0, 1.0)"), (logistic(5.0, 1.5), "Logistic(5.0, 1.5)"), (lognorm(1.0), "LogNormal(0.0, 1.0)"), (lognorm(2.0), "LogNormal(0.0, 2.0)"), (norm(0.0, 1.0), "Normal(0.0, 1.0)"), (norm(-3.0, 2.0), "Normal(-3.0, 2.0)"), (norm(1.0, 10.0), "Normal(1.0, 10.0)"), (norm(0.0, 1.0), "NormalCanon(0.0, 1.0)"), (norm(-0.4, sqrt(0.4)), "NormalCanon(-1.0, 2.5)"), (norm(2.5, sqrt(1.25)), "NormalCanon(2.0, 0.8)"), (pareto(1.0, scale=1.0), "Pareto(1.0, 1.0)"), (pareto(2.0, scale=1.0), "Pareto(2.0, 1.0)"), (pareto(3.0, scale=2.0), "Pareto(3.0, 2.0)"), (rayleigh(scale=1.0), "Rayleigh(1.0)"), (rayleigh(scale=3.0), "Rayleigh(3.0)"), (rayleigh(scale=8.0), "Rayleigh(8.0)"), (t(1.0), "TDist(1.0)"), (t(5.0), "TDist(5.0)"), (t(28.0), "TDist(28.0)"), (uniform(0.0, 1.0), "Uniform(0.0, 1.0)"), (uniform(3.0, 14.0), "Uniform(3.0, 17.0)"), (uniform(3.0, 0.1), "Uniform(3.0, 3.1)"), (weibull_min(0.5, scale=1.0), "Weibull(0.5, 1.0)"), (weibull_min(1.0, scale=1.0), "Weibull(1.0, 1.0)"), (weibull_min(5.0, scale=1.0), "Weibull(5.0, 1.0)"), (weibull_min(20.0, scale=1.0), "Weibull(20.0, 1.0)"), (weibull_min(1.0, scale=2.0), "Weibull(1.0, 2.0)"), (weibull_min(5.0, scale=2.0), "Weibull(5.0, 2.0)") ] print "distr, mean, var, entropy, x25, x50, x75, p25, p50, p75" for (d, e) in lst: m = d.mean() v = d.var() ent = d.entropy() x25 = d.ppf(0.25) x50 = d.ppf(0.50) x75 = d.ppf(0.75) lp25 = d.logpdf(x25) lp50 = d.logpdf(x50) lp75 = d.logpdf(x75) # workaround inconsistency of definitions if e.startswith("Geometric("): x25 -= 1 x50 -= 1 x75 -= 1 m -= 1 elif e == "TDist(1.0)": m = nan print '"%s", %.16e, %.16e, %.16e, %.16e, %.16e, %.16e, %.16e, %.16e, %.16e' % (e, m, v, ent, x25, x50, x75, lp25, lp50, lp75)
 Arcsine() Beta(2.0, 2.0) Beta(3.0, 4.0) Beta(17.0, 13.0) BetaPrime(3.0, 3.0) BetaPrime(3.0, 5.0) BetaPrime(5.0, 3.0) Cauchy(0.0, 1.0) Cauchy(10.0, 1.0) Cauchy(2.0, 10.0) Chi(1) Chi(2) Chi(3) Chi(12) Chisq(1) Chisq(8) Chisq(20) Erlang(1, 1.0) Erlang(3, 1.0) Erlang(5, 2.0) Exponential(1.0) Exponential(6.5) Gamma(1.0, 1.0) Gamma(3.0, 1.0) Gamma(3.0, 2.0) Gumbel(3.0, 5.0) Gumbel(5.0, 3.0) InverseGamma(1.0, 1.0) InverseGamma(1.0, 2.0) InverseGamma(2.0, 1.0) InverseGamma(2.0, 3.0) Laplace(0.0, 1.0) Laplace(5.0, 1.0) Laplace(5.0, 1.5) Logistic(0.0, 1.0) Logistic(5.0, 1.0) Logistic(5.0, 1.5) Normal(0.0, 1.0) Normal(-3.0, 2.0) Normal(1.0, 10.0) NormalCanon(0.0, 1.0) NormalCanon(-1.0, 2.5) NormalCanon(2.0, 0.8) Pareto(1.0, 1.0) Pareto(2.0, 1.0) Pareto(3.0, 2.0) Rayleigh(1.0) Rayleigh(3.0) Rayleigh(8.0) TDist(1.2) TDist(5.0) TDist(28.0) Uniform(0.0, 1.0) Uniform(3.0, 17.0) Uniform(3.0, 3.1) Weibull(0.5, 1.0) Weibull(1.0, 1.0) Weibull(5.0, 1.0) Weibull(20.0, 1.0) Weibull(1.0, 2.0) Weibull(5.0, 2.0)
 # Generate reference values based on scipy.stats from scipy.stats import * lst = [ (bernoulli(0.5), "Bernoulli(0.5)"), (bernoulli(0.9), "Bernoulli(0.9)"), (bernoulli(0.1), "Bernoulli(0.1)"), (binom(1, 0.5), "Binomial(1, 0.5)"), (binom(100, 0.1), "Binomial(100, 0.1)"), (binom(100, 0.9), "Binomial(100, 0.9)"), (randint(0, 4+1), "DiscreteUniform(0, 4)"), (randint(2, 8+1), "DiscreteUniform(2, 8)"), (geom(0.1), "Geometric(0.1)"), (geom(0.5), "Geometric(0.5)"), (geom(0.9), "Geometric(0.9)"), (hypergeom(2+2, 2, 2), "Hypergeometric(2, 2, 2)"), (hypergeom(3+2, 3, 2), "Hypergeometric(3, 2, 2)"), (hypergeom(4+5, 4, 6), "Hypergeometric(4, 5, 6)"), (hypergeom(60+80, 60, 100), "Hypergeometric(60, 80, 100)"), (nbinom(1, 0.5), "NegativeBinomial(1, 0.5)"), (nbinom(5, 0.6), "NegativeBinomial(5, 0.6)"), (poisson(0.5), "Poisson(0.5)"), (poisson(2.0), "Poisson(2.0)"), (poisson(10.0), "Poisson(10.0)"), (poisson(80.0), "Poisson(80.0)") ] print "distr, mean, var, entropy, x25, x50, x75, p25, p50, p75" for (d, e) in lst: m = d.mean() v = d.var() ent = d.entropy() x25 = d.ppf(0.25) x50 = d.ppf(0.50) x75 = d.ppf(0.75) lp25 = d.logpmf(x25) lp50 = d.logpmf(x50) lp75 = d.logpmf(x75) # workaround inconsistency of definitions if e.startswith("Geometric("): x25 -= 1 x50 -= 1 x75 -= 1 m -= 1 print '"%s", %.16e, %.16e, %.16e, %d, %d, %d, %.16e, %.16e, %.16e' % (e, m, v, ent, x25, x50, x75, lp25, lp50, lp75)
 # A Python script to prepare reference values for distribution testing import re from numpy import sqrt, nan, inf from scipy.stats import * def parse_distr(s): ... ... @@ -9,8 +10,12 @@ def parse_distr(s): l = s.index("(") r = s.index(")") name = s[:l] terms = re.split(r",\s*", s[l+1:r]) args = tuple(float(t) for t in terms) ts = s[l+1:r].strip() if len(ts) > 0: terms = re.split(r",\s*", s[l+1:r]) args = tuple(float(t) for t in terms) else: args = () return name, args ... ... @@ -32,42 +37,123 @@ def read_distr_list(filename): def to_scipy_dist(name, args): """Convert from Julia distribution to scipy.stats distribution""" if name == "Bernoulli": if name == "Arcsine": assert len(args) == 0 return arcsine() elif name == "Bernoulli": assert len(args) == 1 return bernoulli(args) elif name == "Beta": assert len(args) == 2 return beta(args, args) elif name == "BetaPrime": assert len(args) == 2 return betaprime(args, args) elif name == "Binomial": assert len(args) == 2 return binom(args, args) elif name == "Cauchy": assert len(args) == 2 return cauchy(args, args) elif name == "Chi": assert len(args) == 1 return chi(args) elif name == "Chisq": assert len(args) == 1 return chi2(args) elif name == "DiscreteUniform": assert len(args) == 2 return randint(args, args + 1) elif name == "Erlang": assert len(args) == 2 return erlang(args, scale=args) elif name == "Exponential": assert len(args) == 1 return expon(scale=args) elif name == "Gamma": assert len(args) == 2 return gamma(args, scale=args) elif name == "Geometric": assert len(args) == 1 return geom(args) elif name == "Gumbel": assert len(args) == 2 return gumbel_r(args, args) elif name == "Hypergeometric": assert len(args) == 3 return hypergeom(args + args, args, args) elif name == "InverseGamma": assert len(args) == 2 return invgamma(args, scale=args) elif name == "Laplace": assert len(args) == 2 return laplace(args, args) elif name == "Logistic": assert len(args) == 2 return logistic(args, args) elif name == "NegativeBinomial": assert len(args) == 2 return nbinom(args, args) elif name == "Normal": assert len(args) == 2 return norm(args, args) elif name == "NormalCanon": assert len(args) == 2 return norm(args / args, sqrt(1.0 / args)) elif name == "Pareto": assert len(args) == 2 return pareto(args, scale=args) elif name == "Poisson": assert len(args) == 1 return poisson(args) elif name == "Rayleigh": assert len(args) == 1 return rayleigh(scale=args) elif name == "TDist": assert len(args) == 1 return t(args) elif name == "Uniform": assert len(args) == 2 return uniform(args, args - args) elif name == "Weibull": assert len(args) == 2 return weibull_min(args, scale=args) else: raise ValueError("Unknown distribution name %s" % name) def do_main(filename): def do_main(title): """main skeleton""" filename = title + "_ref" lst = read_distr_list(filename + ".txt") with open(filename + ".csv", "wt") as fout: ... ... @@ -83,9 +169,14 @@ def do_main(filename): x50 = d.ppf(0.50) x75 = d.ppf(0.75) lp25 = d.logpmf(x25) lp50 = d.logpmf(x50) lp75 = d.logpmf(x75) if title == "discrete": lp25 = d.logpmf(x25) lp50 = d.logpmf(x50) lp75 = d.logpmf(x75) else: lp25 = d.logpdf(x25) lp50 = d.logpdf(x50) lp75 = d.logpdf(x75) # workaround inconsistency of definitions if name == "Geometric": ... ... @@ -94,10 +185,16 @@ def do_main(filename): x75 -= 1 m -= 1 print >>fout, '"%s", %.16e, %.16e, %.16e, %d, %d, %d, %.16e, %.16e, %.16e' % ( ex, m, v, ent, x25, x50, x75, lp25, lp50, lp75) if title == "discrete": print >>fout, '"%s", %.16e, %.16e, %.16e, %d, %d, %d, %.16e, %.16e, %.16e' % ( ex, m, v, ent, x25, x50, x75, lp25, lp50, lp75) else: print >>fout, '"%s", %.16e, %.16e, %.16e, %.16e, %.16e, %.16e, %.16e, %.16e, %.16e' % ( ex, m, v, ent, x25, x50, x75, lp25, lp50, lp75) if __name__ == "__main__": do_main("discrete_ref") do_main("discrete") do_main("continuous")
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!