Ce serveur Gitlab sera éteint le 30 juin 2020, pensez à migrer vos projets vers les serveurs gitlab-research.centralesupelec.fr et gitlab-student.centralesupelec.fr !

Commit 18736115 authored by Douglas Bates's avatar Douglas Bates

Moved Link types here from the Glm package

parent 9eb41de0
module Distributions
export # types
CauchitLink,
CloglogLink,
Distribution,
DiscreteDistribution,
ContinuousDistribution,
......@@ -18,8 +20,13 @@ export # types
Gamma,
Geometric,
HyperGeometric,
IdentityLink,
InverseLink,
Laplace,
Link,
Logistic,
LogitLink,
LogLink,
logNormal,
MixtureModel,
Multinomial,
......@@ -32,11 +39,13 @@ export # types
Normal,
Pareto,
Poisson,
ProbitLink,
Rayleigh,
TDist,
Uniform,
Weibull,
# methods
canonicallink, # canonical link function for a distribution
ccdf, # complementary cdf, i.e. 1 - cdf
cdf, # cumulative distribution function
cquantile, # complementary quantile (i.e. using prob in right hand tail)
......@@ -46,20 +55,26 @@ export # types
invlogccdf, # complementary quantile based on log probability
invlogcdf, # quantile based on log probability
kurtosis, # kurtosis of the distribution
linkfun, # link function mapping mu to eta, the linear predictor
linkinv, # inverse link mapping eta to mu
logccdf, # ccdf returning log-probability
logcdf, # cdf returning log-probability
logpdf, # log probability density
logpmf, # log probability mass
mean, # mean of distribution
median, # median of distribution
mueta, # derivative of inverse link function
mustart, # starting values of mean vector in GLMs
pdf, # probability density function (ContinuousDistribution)
pmf, # probability mass function (DiscreteDistribution)
quantile, # inverse of cdf (defined for p in (0,1))
rand, # random sampler
rand!, # replacement random sampler
sample, # another random sampler - not sure why this is here
skewness, # skewness of the distribution
std, # standard deviation of distribution
valideta, # validity check on linear predictor
validmu, # validity check on mean vector
var # variance of distribution
import Base.mean, Base.median, Base.quantile, Base.rand, Base.std, Base.var, Base.integer_valued
......@@ -459,7 +474,7 @@ mean(d::Beta) = d.alpha / (d.alpha + d.beta)
var(d::Beta) = (ab = d.alpha + d.beta; d.alpha * d.beta /(ab * ab * (ab + 1.)))
skewness(d::Beta) = 2(d.beta - d.alpha)*sqrt(d.alpha + d.beta + 1)/((d.alpha + d.beta + 2)*sqrt(d.alpha*d.beta))
rand(d::Beta) = randbeta(d.alpha, d.beta)
rand!(d::Beta, A::Array{Float64}) = randbeta!(alpha, beta, A)
rand!(d::Beta, A::Array{Float64}) = randbeta!(d.alpha, d.beta, A)
insupport(d::Beta, x::Number) = real_valued(x) && 0 < x < 1
type BetaPrime <: ContinuousDistribution
......@@ -567,9 +582,7 @@ median(d::Exponential) = d.scale * log(2.)
var(d::Exponential) = d.scale * d.scale
skewness(d::Exponential) = 2.
kurtosis(d::Exponential) = 6.
function cdf(d::Exponential, q::Real)
q <= 0. ? 0. : -expm1(-q/d.scale)
end
cdf(d::Exponential, q::Real) = q <= 0. ? 0. : -expm1(-q/d.scale)
function logcdf(d::Exponential, q::Real)
q <= 0. ? -Inf : (qs = -q/d.scale; qs > log(0.5) ? log(-expm1(qs)) : log1p(-exp(qs)))
end
......@@ -1199,4 +1212,85 @@ function sample(a::Array)
sample(a, probs)
end
const minfloat = realmin(Float64)
const oneMeps = 1. - eps()
const llmaxabs = log(-log(minfloat))
abstract Link # Link types define linkfun, linkinv, mueta,
# valideta and validmu.
chkpositive(x::Real) = isfinite(x) && 0. < x ? x : error("argument must be positive")
chkfinite(x::Real) = isfinite(x) ? x : error("argument must be finite")
clamp01(x::Real) = clamp(x, minfloat, oneMeps)
chk01(x::Real) = 0. < x < 1. ? x : error("argument must be in (0,1)")
type CauchitLink <: Link end
linkfun (l::CauchitLink, mu::Real) = tan(pi * (mu - 0.5))
linkinv (l::CauchitLink, eta::Real) = 0.5 + atan(eta) / pi
mueta (l::CauchitLink, eta::Real) = 1. /(pi * (1 + eta * eta))
valideta(l::CauchitLink, eta::Real) = chkfinite(eta)
validmu (l::CauchitLink, mu::Real) = chk01(mu)
type CloglogLink <: Link end
linkfun (l::CloglogLink, mu::Real) = log(-log(1. - mu))
linkinv (l::CloglogLink, eta::Real) = -expm1(-exp(eta))
mueta (l::CloglogLink, eta::Real) = exp(eta) * exp(-exp(eta))
valideta(l::CloglogLink, eta::Real) = abs(eta) < llmaxabs? eta: error("require abs(eta) < $llmaxabs")
validmu (l::CloglogLink, mu::Real) = chk01(mu)
type IdentityLink <: Link end
linkfun (l::IdentityLink, mu::Real) = mu
linkinv (l::IdentityLink, eta::Real) = eta
mueta (l::IdentityLink, eta::Real) = 1.
valideta(l::IdentityLink, eta::Real) = chkfinite(eta)
validmu (l::IdentityLink, mu::Real) = chkfinite(mu)
type InverseLink <: Link end
linkfun (l::InverseLink, mu::Real) = 1. / mu
linkinv (l::InverseLink, eta::Real) = 1. / eta
mueta (l::InverseLink, eta::Real) = -1. / (eta * eta)
valideta(l::InverseLink, eta::Real) = chkpositive(eta)
validmu (l::InverseLink, eta::Real) = chkpositive(mu)
type LogitLink <: Link end
linkfun (l::LogitLink, mu::Real) = log(mu / (1 - mu))
linkinv (l::LogitLink, eta::Real) = 1. / (1. + exp(-eta))
mueta (l::LogitLink, eta::Real) = (e = exp(-abs(eta)); f = 1. + e; e / (f * f))
valideta(l::LogitLink, eta::Real) = chkfinite(eta)
validmu (l::LogitLink, mu::Real) = chk01(mu)
type LogLink <: Link end
linkfun (l::LogLink, mu::Real) = log(mu)
linkinv (l::LogLink, eta::Real) = exp(eta)
mueta (l::LogLink, eta::Real) = exp(eta)
valideta(l::LogLink, eta::Real) = chkfinite(eta)
validmu (l::LogLink, mu::Real) = chkfinite(mu)
type ProbitLink <: Link end
linkfun (l::ProbitLink, mu::Real) =
ccall(dlsym(_jl_libRmath, :qnorm5), Float64,
(Float64,Float64,Float64,Int32,Int32), mu, 0., 1., 1, 0)
linkinv (l::ProbitLink, eta::Real) = (1. + erf(eta/sqrt(2.))) / 2.
mueta (l::ProbitLink, eta::Real) = exp(-0.5eta^2) / sqrt(2.pi)
valideta(l::ProbitLink, eta::Real) = chkfinite(eta)
validmu (l::ProbitLink, mu::Real) = chk01(mu)
# Vectorized methods, including validity checks
function linkfun{T<:Real}(l::Link, mu::AbstractArray{T,1})
[linkfun(l, validmu(l, m)) for m in mu]
end
function linkinv{T<:Real}(l::Link, eta::AbstractArray{T,1})
[linkinv(l, valideta(l, et)) for et in eta]
end
function mueta{T<:Real}(l::Link, eta::AbstractArray{T,1})
[mueta(l, valideta(l, et)) for et in eta]
end
canonicallink(d::Gamma) = InverseLink()
canonicallink(d::Normal) = IdentityLink()
canonicallink(d::Bernoulli) = LogitLink()
canonicallink(d::Poisson) = LogLink()
end #module
require("pkg")
load("Distributions")
using Distributions
require("extras/nearequal.jl")
require("extras/test.jl")
# n probability points, i.e. the midpoints of the intervals [0, 1/n],...,[1-1/n, 1]
probpts(n::Int) = ((1:n) - 0.5)/n
pp = float(probpts(1000)) # convert from a Range{Float64}
......@@ -22,7 +24,7 @@ function reldiff{T<:Real}(current::AbstractArray{T}, target::AbstractArray{T})
@assert all(size(current) == size(target))
max([reldiff(current[i], target[i]) for i in 1:numel(target)])
end
## Checks on ContinuousDistribution instances
for d in (Beta(), Cauchy(), Chisq(12), Exponential(), Exponential(23.1),
FDist(2, 21), Gamma(3), Gamma(), Logistic(), logNormal(),
......@@ -63,9 +65,9 @@ logpmf(d, [1, 1])
logpmf(d, [0, 1])
d.n = 10
rand(d)
A = zeros(Int, 2, 10)
#A = zeros(Int, 2, 10)
#rand!(d, A)
A
#A
d = Dirichlet([1.0, 2.0, 1.0])
d = Dirichlet(3)
......@@ -78,9 +80,9 @@ insupport(d, [0.1, 0.8, 0.2])
insupport(d, [0.1, 0.8])
pdf(d, [0.1, 0.8, 0.1])
rand(d)
A = zeros(Float64, 10, 3)
rand!(d, A)
A
#A = zeros(Float64, 10, 3)
#rand!(d, A)
#A
d = Categorical([0.25, 0.5, 0.25])
d = Categorical(3)
......@@ -103,9 +105,9 @@ d = Categorical([0.25; 0.5; 0.25])
@assert 1.0 <= rand(d) <= 3.0
A = zeros(Int, 10)
#A = zeros(Int, 10)
#rand!(d, A)
@assert 1.0 <= mean(A) <= 3.0
#@assert 1.0 <= mean(A) <= 3.0
# Examples of sample()
a = [1, 6, 19]
......@@ -117,3 +119,34 @@ x = sample(a, p)
#a = 19.0 * eye(2)
#x = sample(a)
#@assert x == 0.0 || x == 19.0
## Link function tests
const ep = eps()
const oneMeps = 1 - ep
srand(1)
etas = (linspace(-7., 7., 15), # equal spacing to asymptotic area
14 * rand(17) - 7, # random sample from wide uniform dist
clamp(rand(Normal(0, 4), 17), -7., 7.), # random sample from wide normal dist
[-7., rand(Normal(0, 4),15), 7.])
## sample linear predictor values for the families in which eta must be positive
etapos = (float64(1:20), rand(Exponential(), 20), rand(Gamma(3), 20), max(ep, rand(Normal(2.), 20)))
## values of mu in the (0,1) interval
mubinom = (rand(100), rand(Beta(1,3), 100),
[ccall((:rbeta, :libRmath), Float64, (Float64,Float64), 0.1, 3) for i in 1:100],
[ccall((:rbeta, :libRmath), Float64, (Float64,Float64), 3, 0.1) for i in 1:100])
for ll in (LogitLink(), ProbitLink()#, CloglogLink() # edge cases for CloglogLink are tricky
, CauchitLink())
# println(ll) # Having problems with the edge when eta is very large or very small
# for i in 1:size(etas,1)
# println(i)
# @assert all(isapprox(linkfun(ll, clamp(linkinv(ll, etas[i]), realmin(Float64), 1.-eps())), etas[i]))
# end
for mu in mubinom
mm = clamp(mu, realmin(), oneMeps)
@assert all(isapprox(linkinv(ll, linkfun(ll, mm)), mm))
end
end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment