Ce serveur Gitlab sera éteint le 30 juin 2020, pensez à migrer vos projets vers les serveurs gitlab-research.centralesupelec.fr et gitlab-student.centralesupelec.fr !

Commit 87eca10b by John Myles White

### Code cleanup and reorganization

parent 164c8437
This diff is collapsed.
 ... ... @@ -41,7 +41,8 @@ function DiscreteDistributionTable{T <: Real}(probs::Vector{T}) counts = Array(Int64, 0) for i in 1:n digit = mod(vals[i], 64) vals[i] = fld(vals[i], 64) # vals[i] = fld(vals[i], 64) vals[i] >>= 6 bounds[index] += digit for itr in 1:digit push!(counts, i) ... ... @@ -49,7 +50,8 @@ function DiscreteDistributionTable{T <: Real}(probs::Vector{T}) end bounds[index] *= multiplier table[index] = counts multiplier *= 64 # multiplier *= 64 multiplier <<= 6 end # Make bounds cumulative ... ... @@ -59,10 +61,11 @@ function DiscreteDistributionTable{T <: Real}(probs::Vector{T}) end function draw(table::DiscreteDistributionTable) i = rand(1:64^9) if i == 64^9 return table.table[9][rand(1:length(table.table[9]))] end # 64^9 - 1 == 0x003fffffffffffff i = rand(1:(64^9 - 1)) # if i == 64^9 # return table.table[9][rand(1:length(table.table[9]))] # end bound = 1 while i > table.bounds[bound] && bound < 9 bound += 1 ... ...
src/fallbacks.jl 0 → 100644
 ############################################################################## # # Fallback methods, usually overridden for specific distributions # ############################################################################## binaryentropy(d::Distribution) = entropy(d) / log(2) ccdf(d::UnivariateDistribution, q::Real) = 1.0 - cdf(d, q) cquantile(d::UnivariateDistribution, p::Real) = quantile(d, 1.0 - p) function deviance{M<:Real,Y<:Real,W<:Real}(d::Distribution, mu::AbstractArray{M}, y::AbstractArray{Y}, wt::AbstractArray{W}) promote_shape(size(mu), promote_shape(size(y), size(wt))) ans = 0.0 for i in 1:length(y) ans += wt[i] * logpdf(d, mu[i], y[i]) end return -2.0 * ans end function devresid(d::Distribution, y::Real, mu::Real, wt::Real) return -2.0 * wt * logpdf(d, mu, y) end function devresid{Y<:Real,M<:Real,W<:Real}(d::Distribution, y::AbstractArray{Y}, mu::AbstractArray{M}, wt::AbstractArray{W}) R = Array(Float64, promote_shape(size(y), promote_shape(size(mu), size(wt)))) for i in 1:length(mu) R[i] = devresid(d, y[i], mu[i], wt[i]) end return R end function devresid(d::Distribution, y::Vector{Float64}, mu::Vector{Float64}, wt::Vector{Float64}) return [devresid(d, y[i], mu[i], wt[i]) for i in 1:length(y)] end function insupport(d::Distribution, x::AbstractArray) for e in x if !insupport(d, e) return false end end return true end invlogccdf(d::Distribution, lp::Real) = quantile(d, -expm1(lp)) invlogcdf(d::Distribution, lp::Real) = quantile(d, exp(lp)) # kurtosis returns excess kurtosis by default # proper kurtosis can be returned with correction = false function kurtosis(d::Distribution, correction::Bool) if correction return kurtosis(d) else return kurtosis(d) + 3.0 end end excess(d::Distribution) = kurtosis(d) excess_kurtosis(d::Distribution) = kurtosis(d) proper_kurtosis(d::Distribution) = kurtosis(d, false) logcdf(d::Distribution, q::Real) = log(cdf(d,q)) logccdf(d::Distribution, q::Real) = log(ccdf(d,q)) logpdf(d::Distribution, x::Real) = log(pdf(d,x)) function logpdf!(r::AbstractArray, d::UnivariateDistribution, x::AbstractArray) if size(x) != size(r) throw(ArgumentError("Inconsistent array dimensions.")) end for i in 1:length(x) r[i] = logpdf(d, x[i]) end end function logpdf(d::MultivariateDistribution, x::AbstractMatrix) n::Int = size(x, 2) r = Array(Float64, n) for i in 1:n r[i] = logpdf(d, x[:, i]) end r end function logpdf!(r::AbstractArray, d::MultivariateDistribution, x::AbstractMatrix) n::Int = size(x, 2) if length(r) != n throw(ArgumentError("Inconsistent array dimensions.")) end for i = 1:n r[i] = logpdf(d, x[:, i]) end end logpmf(d::DiscreteDistribution, args::Any...) = logpdf(d, args...) function logpmf!(r::AbstractArray, d::DiscreteDistribution, args::Any...) return logpdf!(r, d, args...) end function mustart{Y<:Real,W<:Real}(d::Distribution, y::AbstractArray{Y}, wt::AbstractArray{W}) M = Array(Float64, promote_shape(size(y), size(wt))) for i in 1:length(M) M[i] = mustart(d, y[i], wt[i]) end return M end pmf(d::DiscreteDistribution, args::Any...) = pdf(d, args...) function rand!(d::UnivariateDistribution, A::Array) for i in 1:length(A) A[i] = rand(d) end return A end function rand(d::ContinuousDistribution, dims::Dims) return rand!(d, Array(Float64, dims)) end function rand(d::DiscreteDistribution, dims::Dims) return rand!(d, Array(Int, dims)) end function rand(d::NonMatrixDistribution, dims::Integer...) return rand(d, map(int, dims)) end function rand(d::MultivariateDistribution, dims::Integer) return rand(d, (dims, length(mean(d)))) end function rand(d::MatrixDistribution, dims::Integer) return rand!(d, Array(Matrix{Float64}, int(dims))) end function rand!(d::MultivariateDistribution, X::Matrix) k = length(mean(d)) m, n = size(X) if m == k for i in 1:n X[:, i] = rand(d) end elseif n == k for i in 1:m X[i, :] = rand(d) end else error("Wrong dimensions") end return X end function rand!(d::MatrixDistribution, X::Array{Matrix{Float64}}) for i in 1:length(X) X[i] = rand(d) end return X end function sprand(m::Integer, n::Integer, density::Real, d::Distribution) return sprand(m, n, density, n -> rand(d, n)) end std(d::Distribution) = sqrt(var(d)) function var{M <: Real}(d::Distribution, mu::AbstractArray{M}) V = similar(mu, Float64) for i in 1:length(mu) V[i] = var(d, mu[i]) end return V end # Vectorize methods for f in (:pdf, :logpdf, :cdf, :logcdf, :ccdf, :logccdf, :quantile, :cquantile, :invlogcdf, :invlogccdf) @eval begin function (\$f)(d::UnivariateDistribution, x::AbstractArray) res = Array(Float64, size(x)) for i in 1:length(res) res[i] = (\$f)(d, x[i]) end return res end end end
 function digamma(x::Float64) # Use Int instead? ccall(("digamma", :libRmath), Float64, (Float64, ), x) ccall(("digamma", :libRmath), Float64, (Float64, ), x) end function trigamma(x::Float64) # Use Int instead? ccall(("trigamma", :libRmath), Float64, (Float64, ), x) ccall(("trigamma", :libRmath), Float64, (Float64, ), x) end function fit(::Type{Bernoulli}, x::Array) for i in 1:length(x) if !insupport(Bernoulli(), x[i]) error("Bernoulli observations must be 0/1 values") for i in 1:length(x) if !insupport(Bernoulli(), x[i]) error("Bernoulli observations must be 0/1 values") end end end return Bernoulli(mean(x)) return Bernoulli(mean(x)) end function fit(::Type{Beta}, x::Array) for i in 1:length(x) if !insupport(Beta(), x[i]) error("Bernoulli observations must be in [0,1]") for i in 1:length(x) if !insupport(Beta(), x[i]) error("Bernoulli observations must be in [0,1]") end end end x_bar = mean(x) v_bar = var(x) a = x_bar * (((x_bar * (1. - x_bar)) / v_bar) - 1.) b = (1. - x_bar) * (((x_bar * (1. - x_bar)) / v_bar) - 1.) return Beta(a, b) x_bar = mean(x) v_bar = var(x) a = x_bar * (((x_bar * (1.0 - x_bar)) / v_bar) - 1.0) b = (1.0 - x_bar) * (((x_bar * (1.0 - x_bar)) / v_bar) - 1.0) return Beta(a, b) end function fit(::Type{Binomial}, x::Real, n::Real) if x > n || x < 0 error("For binomial observations, x must lie in [0, n]") end return Binomial(int(n), x / n) if x > n || x < 0 error("For binomial observations, x must lie in [0, n]") end return Binomial(int(n), x / n) end # Categorical ... ... @@ -44,43 +44,43 @@ end # Dirichlet function fit(::Type{DiscreteUniform}, x::Array) DiscreteUniform(min(x), max(x)) DiscreteUniform(min(x), max(x)) end function fit(::Type{Exponential}, x::Array) for i in 1:length(x) if !insupport(Exponential(), x[i]) error("Exponential observations must be non-negative values") for i in 1:length(x) if !insupport(Exponential(), x[i]) error("Exponential observations must be non-negative values") end end end return Exponential(mean(x)) return Exponential(mean(x)) end # FDist function fit(::Type{Gamma}, x::Array) a_old = 0.5 / (log(mean(x)) - mean(log(x))) a_new = 0.5 / (log(mean(x)) - mean(log(x))) z = 1.0 / a_old + (mean(log(x)) - log(mean(x)) + log(a_old) - digamma(a_old)) / (a_old^2 * (1.0 / a_old - trigamma(a_old))) a_new, a_old = (1.0 / z, a_new) a_old = 0.5 / (log(mean(x)) - mean(log(x))) a_new = 0.5 / (log(mean(x)) - mean(log(x))) iterations = 1 while abs(a_old - a_new) > 10e-16 && iterations <= 10_000 z = 1.0 / a_old + (mean(log(x)) - log(mean(x)) + log(a_old) - digamma(a_old)) / (a_old^2 * (1.0 / a_old - trigamma(a_old))) a_new, a_old = (1.0 / z, a_new) end a_new, a_old = 1.0 / z, a_new if iterations >= 10_000 error("Parameter estimation failed to converge in 10,000 iterations") end iterations = 1 Gamma(a_new, mean(x) / a_new) while abs(a_old - a_new) > 1e-16 && iterations <= 10_000 z = 1.0 / a_old + (mean(log(x)) - log(mean(x)) + log(a_old) - digamma(a_old)) / (a_old^2 * (1.0 / a_old - trigamma(a_old))) a_new, a_old = 1.0 / z, a_new end if iterations >= 10_000 error("Parameter estimation failed to converge in 10,000 iterations") end Gamma(a_new, mean(x) / a_new) end function fit(::Type{Geometric}, x::Array) Geometric(1.0 / mean(convert(Array{Int}, x))) Geometric(1.0 / mean(convert(Array{Int}, x))) end # HyperGeometric ... ... @@ -88,13 +88,13 @@ end # Logistic function fit(::Type{Laplace}, x::Array) a = median(x) deviations = 0.0 for i in 1:length(x) deviations += abs(x[i] - a) end b = deviations / length(x) Laplace(a, b) a = median(x) deviations = 0.0 for i in 1:length(x) deviations += abs(x[i] - a) end b = deviations / length(x) Laplace(a, b) end # logNormal ... ... @@ -123,24 +123,24 @@ end # Normal # Not strict MLE function fit(::Type{Normal}, x::Array) Normal(mean(x), std(x)) Normal(mean(x), std(x)) end # Poisson function fit(::Type{Poisson}, x::Array) for i in 1:length(x) if !insupport(Poisson(), x[i]) error("Poisson observations must be non-negative integers") for i in 1:length(x) if !insupport(Poisson(), x[i]) error("Poisson observations must be non-negative integers") end end end Poisson(mean(x)) Poisson(mean(x)) end # TDist # Uniform function fit(::Type{Uniform}, x::Array) Uniform(min(x), max(x)) Uniform(min(x), max(x)) end # Weibull
 function getEndpoints(distr::UnivariateDistribution, epsilon::Real) (left,right) = map(x -> quantile(distr,x), (0,1)) leftEnd = left!=-Inf ? left : quantile(distr, epsilon) rightEnd = right!=-Inf ? right : quantile(distr, 1-epsilon) (leftEnd, rightEnd) end function expectation(distr::ContinuousUnivariateDistribution, g::Function, epsilon::Real) f = x->pdf(distr,x) (leftEnd, rightEnd) = getEndpoints(distr, epsilon) integrate(x -> f(x)*g(x), leftEnd, rightEnd) end ## Assuming that discrete distributions only take integer values. function expectation(distr::DiscreteUnivariateDistribution, g::Function, epsilon::Real) f = x->pmf(distr,x) (leftEnd, rightEnd) = getEndpoints(distr, epsilon) sum(map(x -> f(x)*g(x), leftEnd:rightEnd)) end function expectation(distr::UnivariateDistribution, g::Function) expectation(distr, g, 1e-10) end function entropy(distr::UnivariateDistribution) pf = typeof(distr)<:ContinuousDistribution ? pdf : pmf f = x -> pf(distr, x) expectation(distr, x -> -log(f(x))) end function KL(P::UnivariateDistribution, Q::UnivariateDistribution) expectation(P, x -> log(pdf(P,x)/pdf(Q,x))) end
src/glmtools.jl 0 → 100644
 const minfloat = realmin(Float64) const oneMeps = 1. - eps() const llmaxabs = log(-log(minfloat)) const logeps = log(eps()) abstract Link # Link types define linkfun, linkinv, mueta, # valideta and validmu. chkpositive(x::Real) = isfinite(x) && 0. < x ? x : error("argument must be positive") chkfinite(x::Real) = isfinite(x) ? x : error("argument must be finite") clamp01(x::Real) = clamp(x, minfloat, oneMeps) chk01(x::Real) = 0. < x < 1. ? x : error("argument must be in (0,1)") type CauchitLink <: Link end linkfun (l::CauchitLink, mu::Real) = tan(pi * (mu - 0.5)) linkinv (l::CauchitLink, eta::Real) = 0.5 + atan(eta) / pi mueta (l::CauchitLink, eta::Real) = 1. /(pi * (1 + eta * eta)) valideta(l::CauchitLink, eta::Real) = chkfinite(eta) validmu (l::CauchitLink, mu::Real) = chk01(mu) type CloglogLink <: Link end linkfun (l::CloglogLink, mu::Real) = log(-log(1. - mu)) linkinv (l::CloglogLink, eta::Real) = -expm1(-exp(eta)) mueta (l::CloglogLink, eta::Real) = exp(eta) * exp(-exp(eta)) valideta(l::CloglogLink, eta::Real) = abs(eta) < llmaxabs? eta: error("require abs(eta) < \$llmaxabs") validmu (l::CloglogLink, mu::Real) = chk01(mu) type IdentityLink <: Link end linkfun (l::IdentityLink, mu::Real) = mu linkinv (l::IdentityLink, eta::Real) = eta mueta (l::IdentityLink, eta::Real) = 1. valideta(l::IdentityLink, eta::Real) = chkfinite(eta) validmu (l::IdentityLink, mu::Real) = chkfinite(mu) type InverseLink <: Link end linkfun (l::InverseLink, mu::Real) = 1. / mu linkinv (l::InverseLink, eta::Real) = 1. / eta mueta (l::InverseLink, eta::Real) = -1. / (eta * eta) valideta(l::InverseLink, eta::Real) = chkpositive(eta) validmu (l::InverseLink, eta::Real) = chkpositive(mu) type LogitLink <: Link end linkfun (l::LogitLink, mu::Real) = log(mu / (1 - mu)) linkinv (l::LogitLink, eta::Real) = 1. / (1. + exp(-eta)) mueta (l::LogitLink, eta::Real) = (e = exp(-abs(eta)); f = 1. + e; e / (f * f)) valideta(l::LogitLink, eta::Real) = chkfinite(eta) validmu (l::LogitLink, mu::Real) = chk01(mu) type LogLink <: Link end linkfun (l::LogLink, mu::Real) = log(mu) linkinv (l::LogLink, eta::Real) = exp(eta) mueta (l::LogLink, eta::Real) = eta < logeps ? eps() : exp(eta) valideta(l::LogLink, eta::Real) = chkfinite(eta) validmu (l::LogLink, mu::Real) = chkpositive(mu) type ProbitLink <: Link end linkfun (l::ProbitLink, mu::Real) = ccall((:qnorm5, Rmath), Float64, (Float64,Float64,Float64,Int32,Int32), mu, 0., 1., 1, 0) linkinv (l::ProbitLink, eta::Real) = (1. + erf(eta/sqrt(2.))) / 2. mueta (l::ProbitLink, eta::Real) = exp(-0.5eta^2) / sqrt(2.pi) valideta(l::ProbitLink, eta::Real) = chkfinite(eta) validmu (l::ProbitLink, mu::Real) = chk01(mu) # Vectorized methods, including validity checks function linkfun{T<:Real}(l::Link, mu::AbstractArray{T,1}) [linkfun(l, validmu(l, m)) for m in mu] end function linkinv{T<:Real}(l::Link, eta::AbstractArray{T,1}) [linkinv(l, valideta(l, et)) for et in eta] end function mueta{T<:Real}(l::Link, eta::AbstractArray{T,1}) [mueta(l, valideta(l, et)) for et in eta] end canonicallink(d::Gamma) = InverseLink() canonicallink(d::Normal) = IdentityLink() canonicallink(d::Bernoulli) = LogitLink() canonicallink(d::Poisson) = LogLink()
 ############################################################################## # # Inverse Wishart Distribution # # Parameterized such that E(X) = Psi / (nu - p - 1) # See the riwish and diwish function of R's MCMCpack # ############################################################################## immutable InverseWishart <: ContinuousMatrixDistribution nu::Float64 Psichol::Cholesky{Float64} function InverseWishart(n::Real, Pc::Cholesky{Float64}) if n > size(Pc, 1) - 1 new(float64(n), Pc) else error("Inverse Wishart parameters must be df > p - 1") end end end function InverseWishart(nu::Real, Psi::Matrix{Float64}) InverseWishart(float64(nu), cholfact(Psi)) end function insupport(IW::InverseWishart, X::Matrix{Float64}) return size(X, 1) == size(X, 2) && isApproxSymmmetric(X) && size(X, 1) == size(IW.Psichol, 1) && hasCholesky(X) end function mean(IW::InverseWishart) if IW.nu > size(IW.Psichol, 1) + 1 return 1.0 / (IW.nu - size(IW.Psichol, 1) - 1.0) * IW.Psichol[:U]' * IW.Psichol[:U] else error("mean only defined for nu > p + 1") end end pdf(IW::InverseWishart, X::Matrix{Float64}) = exp(logpdf(IW, X)) function logpdf(IW::InverseWishart, X::Matrix{Float64}) if !insupport(IW, X) return -Inf else p = size(X, 1) logd::Float64 = IW.nu * p / 2.0 * log(2.0) logd += lpgamma(p, IW.nu / 2.0) logd -= IW.nu / 2.0 * logdet(IW.Psichol) logd = -logd logd -= 0.5 * (IW.nu + p + 1.0) * logdet(X) logd -= 0.5 * trace(X \ (IW.Psichol[:U]' * IW.Psichol[:U])) return logd end end # rand(Wishart(nu, Psi^-1))^-1 is an sample from an # inverse wishart(nu, Psi). there is actually some wacky # behavior here where inv of the Cholesky returns the # inverse of the original matrix, in this case we're getting # Psi^-1 like we want rand(IW::InverseWishart) = inv(rand(Wishart(IW.nu, inv(IW.Psichol)))) function rand!(IW::InverseWishart, X::Array{Matrix{Float64}}) Psiinv = inv(IW.Psichol) W = Wishart(IW.nu, Psiinv) X = rand!(W, X) for i in 1:length(X) X[i] = inv(X[i]) end return X end var(IW::InverseWishart) = error("Not yet implemented") # because X == X' keeps failing due to floating point nonsense function isApproxSymmmetric(a::Matrix{Float64}) tmp = true for j in 2:size(a, 1) for i in 1:(j - 1) tmp &= abs(a[i, j] - a[j, i]) < 1e-8 end end return tmp end # because isposdef keeps giving the wrong answer for samples # from Wishart and InverseWisharts function hasCholesky(a::Matrix{Float64}) try achol = cholfact(a) catch e return false end return true end
 ############################################################################## # # Wishart Distribution # # Parameters nu and S such that E(X) = nu * S # See the rwish and dwish implementation in R's MCMCPack # This parametrization differs from Bernardo & Smith p 435 # in this way: (nu, S) = (2.0 * alpha, 0.5 * beta^-1) # ############################################################################## immutable Wishart <: ContinuousMatrixDistribution nu::Float64 Schol::Cholesky{Float64} function Wishart(n::Real, Sc::Cholesky{Float64}) if n > size(Sc, 1) - 1 new(float64(n), Sc) else error("Wishart parameters must be df > p - 1") end end end Wishart(nu::Real, S::Matrix{Float64}) = Wishart(nu, cholfact(S)) function insupport(W::Wishart, X::Matrix{Float64}) return size(X, 1) == size(X, 2) && isApproxSymmmetric(X) && size(X, 1) == size(W.Schol, 1) && hasCholesky(X) end mean(w::Wishart) = w.nu * w.Schol[:U]' * w.Schol[:U] pdf(W::Wishart, X::Matrix{Float64}) = exp(logpdf(W, X)) function logpdf(W::Wishart, X::Matrix{Float64}) if !insupport(W, X) return -Inf else p = size(X, 1) logd::Float64 = W.nu * p / 2.0 * log(2.0) logd += W.nu / 2.0 * log(det(W.Schol)) logd += lpgamma(p, W.nu / 2.0) logd = -logd logd += 0.5 * (W.nu - p - 1.0) * logdet(X) logd -= 0.5 * trace(W.Schol \ X) return logd end end function rand(w::Wishart) p = size(w.Schol, 1) X = zeros(p, p) for ii in 1:p X[ii, ii] = sqrt(rand(Chisq(w.nu - ii + 1))) end if p > 1 for col in 2:p for row in 1:(col - 1) X[row, col] = randn() end end end Z = X * w.Schol[:U] return Z' * Z end