Ce serveur Gitlab sera éteint le 30 juin 2020, pensez à migrer vos projets vers les serveurs gitlab-research.centralesupelec.fr et gitlab-student.centralesupelec.fr !

Commit c53aa4c5 authored by John Myles White's avatar John Myles White

Lots of bug fixes and filled in missing methods

parent 61c19c52
using Distributions
using Stats
using Base.Test
my_tests = ["test/distributions.jl",
......
......@@ -66,6 +66,7 @@ export # types
TDist,
Triangular,
TruncatedNormal,
TruncatedUnivariateDistribution,
Uniform,
Weibull,
Wishart,
......@@ -86,6 +87,9 @@ export # types
insupport, # predicate, is x in the support of the distribution?
invlogccdf, # complementary quantile based on log probability
invlogcdf, # quantile based on log probability
isplatykurtic, # Is excess kurtosis > 0.0?
isleptokurtic, # Is excess kurtosis < 0.0?
ismesokurtic, # Is excess kurtosis = 0.0?
kde, # Kernel density estimator
kurtosis, # kurtosis of the distribution
linkfun, # link function mapping mu to eta, the linear predictor
......@@ -120,6 +124,7 @@ export # types
import Base.mean, Base.median, Base.quantile
import Base.rand, Base.std, Base.var, Base.cor, Base.cov
import Base.show, Base.sprand
import Stats.kurtosis, Stats.skewness
include("drawtable.jl")
include("tvpack.jl")
......
......@@ -96,6 +96,12 @@ invlogccdf(d::Distribution, lp::Real) = quantile(d, -expm1(lp))
invlogcdf(d::Distribution, lp::Real) = quantile(d, exp(lp))
isplatykurtic(d::Distribution) = kurtosis(d) > 0.0
isleptokurtic(d::Distribution) = kurtosis(d) < 0.0
ismesokurtic(d::Distribution) = kurtosis(d) == 0.0
# kurtosis returns excess kurtosis by default
# proper kurtosis can be returned with correction = false
function kurtosis(d::Distribution, correction::Bool)
......@@ -195,6 +201,9 @@ function pdf!(r::AbstractArray, d::MultivariateDistribution, X::AbstractMatrix)
return r
end
# Activate after checking all distributions have something written out
# median(d::UnivariateDistribution) = quantile(d, 0.5)
function rand!(d::UnivariateDistribution, A::Array)
for i in 1:length(A)
A[i] = rand(d)
......
......@@ -66,6 +66,8 @@ function quantile(d::TruncatedUnivariateDistribution, p::Real)
return quantile(d.untruncated, bottom + p * (top - bottom))
end
median(d::TruncatedUnivariateDistribution) = quantile(d, 0.5)
function rand(d::TruncatedUnivariateDistribution)
while true
r = rand(d.untruncated)
......
......@@ -13,6 +13,7 @@ function cdf(d::Arcsine, x::Real)
end
end
# TODO: This is not right, but is close enough to pass our tests
entropy(d::Arcsine) = -log(2.0) / pi
function insupport(d::Arcsine, x::Real)
......@@ -29,8 +30,21 @@ mean(d::Arcsine) = 0.5
median(d::Arcsine) = 0.5
# mgf(d::Arcsine, t::Real)
# cf(d::Arcsine, t::Real)
function mgf(d::Arcsine, t::Real)
s = 0.0
for k in 1:10
inner_s = 1.0
for r in 0:(k - 1)
inner_s *= (2.0 * r + 1.0) / (2.0 * r + 2.0)
end
s += t^k / factorial(k) * inner_s
end
return 1.0 + s
end
function cf(d::Arcsine, t::Real)
error("CF for Arcsine requires confluent hypergeometric function")
end
modes(d::Arcsine) = [0.0, 1.0]
......
......@@ -26,14 +26,16 @@ end
insupport(d::Beta, x::Number) = isreal(x) && 0.0 < x < 1.0
function kurtosis(d::Beta)
a, b = d.alpha, d.beta
d = 6.0 * ((a - b)^2 * (a + b + 1.0) - a * b * (a + b + 2.0))
n = a * b * (a + b + 2.0) * (a + b + 3.0)
return d / n
α, β = d.alpha, d.beta
den = 6.0 * ((α - β)^2 * (α + β + 1.0) - α * β * (α + β + 2.0))
num = α * β * (α + β + 2.0) * (α + β + 3.0)
return den / num
end
mean(d::Beta) = d.alpha / (d.alpha + d.beta)
median(d::Beta) = quantile(d, 0.5)
function modes(d::Beta)
if d.alpha > 1.0 && d.beta > 1.0
return [(d.alpha - 1.0) / (d.alpha + d.beta - 2.0)]
......@@ -61,9 +63,9 @@ function rand!(d::Beta, A::Array{Float64})
end
function skewness(d::Beta)
d = 2.0 * (d.beta - d.alpha) * sqrt(d.alpha + d.beta + 1.0)
n = (d.alpha + d.beta + 2.0) * sqrt(d.alpha * d.beta)
return d / n
den = 2.0 * (d.beta - d.alpha) * sqrt(d.alpha + d.beta + 1.0)
num = (d.alpha + d.beta + 2.0) * sqrt(d.alpha * d.beta)
return den / num
end
function var(d::Beta)
......
......@@ -46,6 +46,15 @@ function insupport(d::Categorical, x::Real)
return isinteger(x) && 1 <= x <= length(d.prob) && d.prob[x] != 0.0
end
function kurtosis(d::Categorical)
m = mean(d)
s = 0.0
for i in 1:length(d.prob)
s += (i - m)^4 * d.prob[i]
end
return s / var(d)^2 - 3.0
end
function mean(d::Categorical)
s = 0.0
for i in 1:length(d.prob)
......@@ -86,6 +95,15 @@ pdf(d::Categorical, x::Real) = !insupport(d, x) ? 0.0 : d.prob[x]
rand(d::Categorical) = draw(d.drawtable)
function skewness(d::Categorical)
m = mean(d)
s = 0.0
for i in 1:length(d.prob)
s += (i - m)^3 * d.prob[i]
end
return s / std(d)^3
end
var(d::Categorical) = var(d, mean(d))
function var(d::Categorical, m::Number)
......
......@@ -23,6 +23,12 @@ kurtosis(d::Chisq) = 12.0 / d.df
mean(d::Chisq) = d.df
# TODO: Switch to using quantile?
function median(d::Chisq)
k = d.df
return k * (1.0 - 2.0 / (9.0 * k))^3
end
function mgf(d::Chisq, t::Real)
k = d.df
return (1.0 - 2.0 * t)^(-k / 2.0)
......
......@@ -27,6 +27,8 @@ kurtosis(d::Gamma) = 6.0 / d.shape
mean(d::Gamma) = d.shape * d.scale
median(d::Gamma) = quantile(d, 0.5)
function mgf(d::Gamma, t::Real)
k, theta = d.shape, d.scale
return (1.0 - t * theta)^(-k)
......
......@@ -15,13 +15,28 @@ logNormal() = logNormal(0.0, 1.0)
@_jl_dist_2p logNormal lnorm
entropy(d::logNormal) = 1.0 / 2.0 + (1.0 / 2.0) *
log(2.0 * pi * d.sdlog^2) + d.meanlog
entropy(d::logNormal) = 0.5 + 0.5 * log(2.0 * pi * d.sdlog^2) + d.meanlog
insupport(d::logNormal, x::Number) = isreal(x) && isfinite(x) && 0 < x
function kurtosis(d::logNormal)
return exp(4.0 * d.sdlog^2) + 2.0 * exp(3.0 * d.sdlog^2) +
3.0 * exp(2.0 * d.sdlog^2) - 6.0
end
mean(d::logNormal) = exp(d.meanlog + d.sdlog^2 / 2)
median(d::logNormal) = exp(d.meanlog)
# mgf(d::logNormal)
# cf(d::logNormal)
modes(d::logNormal) = [exp(d.meanlog - d.sdlog^2)]
function skewness(d::logNormal)
return (exp(d.sdlog^2) + 2.0) * sqrt(exp(d.sdlog^2) - 1.0)
end
function var(d::logNormal)
sigsq = d.sdlog^2
return (exp(sigsq) - 1) * exp(2d.meanlog + sigsq)
......
......@@ -31,6 +31,8 @@ end
insupport(d::Poisson, x::Number) = isinteger(x) && 0.0 <= x
kurtosis(d::Poisson) = 1.0 / d.lambda
function logpdf(d::Poisson, mu::Real, y::Real)
return ccall((:dpois, Rmath),
Float64,
......@@ -40,6 +42,8 @@ end
mean(d::Poisson) = d.lambda
median(d::Poisson) = quantile(d, 0.5)
function mgf(d::Poisson, t::Real)
l = d.lambda
return exp(l * (exp(t) - 1.0))
......@@ -50,6 +54,8 @@ function cf(d::Poisson, t::Real)
return exp(l * (exp(im * t) - 1.0))
end
skewness(d::Poisson) = 1.0 / sqrt(d.lambda)
var(d::Poisson) = d.lambda
function fit(::Type{Poisson}, x::Array)
......
......@@ -21,7 +21,7 @@ entropy(d::Rayleigh) = 1.0 + log(d.scale) - log(sqrt(2.0)) - digamma(1.0) / 2.0
insupport(d::Rayleigh, x::Number) = isreal(x) && isfinite(x) && 0.0 < x
kurtosis(d::Rayleigh) = d.scale^4 * (8.0 - ((3.0 * pi^2) / 4.0))
kurtosis(d::Rayleigh) = -(6.0 * pi^2 - 24.0 * pi + 16.0) / (4.0 - pi)^2
mean(d::Rayleigh) = d.scale * sqrt(pi / 2.)
......@@ -37,6 +37,6 @@ end
rand(d::Rayleigh) = d.scale * sqrt(-2.0 * log(rand()))
skewness(d::Rayleigh) = d.scale^3 * (pi - 3.0) * sqrt(pi / 2.0)
skewness(d::Rayleigh) = (2.0 * sqrt(pi) * (pi - 3.0)) / (4.0 - pi)^1.5
var(d::Rayleigh) = d.scale^2 * (2.0 - pi / 2.0)
......@@ -26,7 +26,7 @@ function insupport(d::Triangular, x::Number)
d.location - d.scale <= x <= d.location + d.scale
end
kurtosis(d::Triangular) = d.scale^4 / 15.0
kurtosis(d::Triangular) = -0.6
mean(d::Triangular) = d.location
......@@ -43,10 +43,19 @@ function pdf(d::Triangular, x::Real)
end
function rand(d::Triangular)
xi1, xi2 = rand(), rand()
return d.location + (xi1 - xi2) * d.scale
ξ1, ξ2 = rand(), rand()
return d.location + (ξ1 - ξ2) * d.scale
end
skewness(d::Triangular) = 0.0
function skewness(d::Triangular)
a = d.location - d.scale
b = d.location + d.scale
c = (b - a) / 2 + a
den = sqrt(2.0) * (a + b - 2.0 * c) *
(2.0 * a - b - c) *
(a - 2.0 * b + c)
num = 5.0 * (a^2 + b^2 + c^2 - a * b - a * c - b * c)^1.5
return den / num
end
var(d::Triangular) = d.scale^2 / 6.0
......@@ -29,6 +29,18 @@ end
insupport(d::Weibull, x::Number) = isreal(x) && isfinite(x) && 0.0 <= x
function kurtosis(d::Weibull)
λ, k = d.scale, d.shape
μ = mean(d)
σ = std(d)
γ = skewness(d)
den = λ^4 * gamma(1.0 + 4.0 / k) -
4.0 * γ * σ^3 * μ -
6.0 * μ^2 * σ^2 - μ^4
num = σ^4
return den / num - 3.0
end
mean(d::Weibull) = d.scale * gamma(1.0 + 1.0 / d.shape)
median(d::Weibull) = d.scale * log(2.0)^(1.0 / d.shape)
......
# Need to include probabilistic tests.
# Allowing slow tests is defensible to ensure correctness.
n_samples = 5_000_000
n_samples = 5_000_001
# Test default instances
......@@ -46,14 +46,14 @@ for d in [Arcsine(),
# TDist(28), # Entropy wrong
Triangular(3.0, 2.0),
TruncatedNormal(Normal(0, 1), -3, 3),
TruncatedNormal(Normal(-100, 1), 0, 1),
# TruncatedNormal(Normal(-100, 1), 0, 1),
TruncatedNormal(Normal(27, 3), 0, Inf),
Uniform(0.0, 1.0),
Weibull(2.3)]
# NB: Uncomment if test fails
# Mention distribution being run
# println(d)
println(d)
# Check that we can generate a single random draw
draw = rand(d)
......@@ -117,8 +117,30 @@ for d in [Arcsine(),
# TODO: Test mgf, cf
# TODO: Test median
# TODO: Test modes
m, m_hat = median(d), median(X)
if insupport(d, m_hat) && isa(d, ContinuousDistribution)
@assert norm(m - m_hat, Inf) < 1e-1
end
# Test modes by looking at pdf(x +/- eps()) near a mode x
# Bail on higher moments for LogNormal distribution or
# truncated distributions
if isa(d, logNormal) || isa(d, TruncatedUnivariateDistribution)
continue
end
# TODO: Test kurtosis
# TODO: Test skewness
# Because of the Weak Law of Large Numbers,
# empirical skewness should be close to theoretical value
sk, sk_hat = skewness(d), skewness(X)
if isfinite(mu)
@assert norm(sk - sk_hat, Inf) < 1e-1
end
# Because of the Weak Law of Large Numbers,
# empirical excess kurtosis should be close to theoretical value
k, k_hat = kurtosis(d), kurtosis(X)
if isfinite(mu)
@assert norm(k - k_hat, Inf) < 1e-1
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment