Ce serveur Gitlab sera éteint le 30 juin 2020, pensez à migrer vos projets vers les serveurs gitlab-research.centralesupelec.fr et gitlab-student.centralesupelec.fr !

Commit bcd1cdc1 authored by Dahua Lin's avatar Dahua Lin

rewrite mixture models (not tested yet)

parent 95cee4da
......@@ -141,6 +141,7 @@ export
cquantile, # complementary quantile (i.e. using prob in right hand tail)
cumulant, # cumulants of distribution
complete, # turn an incomplete formulation into a complete distribution
components, # get components from a mixture model
concentration, # the concentration parameter
dim, # sample dimension of multivariate distribution
entropy, # entropy of distribution in nats
......
immutable MixtureModel{VF,VS,Component<:Distribution} <: Distribution{VF,VS}
# finite mixture models
immutable MixtureModel{VF<:VariateForm, VS<:ValueSupport, Component<:Distribution} <: Distribution{VF,VS}
components::Vector{Component}
probs::Vector{Float64}
aliastable::AliasTable
function MixtureModel(c::Vector{Component}, p::Vector{Float64})
if !(Component <: Distribution{VF,VS})
throw(TypeError(:MixtureModel,
"Mixture components type mismatch",
Distribution{VF,VS},
Component))
end
if length(c) != length(p)
throw(ArgumentError(string("components and probs must have",
" the same number of elements")))
end
sizes = [size(component)::Tuple for component in c]
if !all(sizes .== sizes[1])
error("MixtureModel: mixture components have different dimensions")
end
sump = 0.0
for i in 1:length(p)
if p[i] < 0.0
error("MixtureModel: probabilities must be non-negative")
end
sump += p[i]
end
table = AliasTable(p ./ sump)
new(c, p ./ sump, table)
end
prior::Categorical
end
function MixtureModel{C<:Distribution}(cs::Vector{C}, ps::Vector{Float64})
typealias UnivariateMixture MixtureModel{Univariate}
typealias MultivariateMixture MixtureModel{Multivariate}
typealias MatrixvariateMixture MixtureModel{Matrixvariate}
#### Constructors
function MixtureModel{C<:Distribution}(components::Vector{C}, prior::Categorical)
length(complements) == ncategories(prior) ||
error("Inconsistent sizes of components and prior.")
VF = variate_form(C)
VS = value_support(C)
MixtureModel{VF,VS,C}(cs, ps)
MixtureModel{VF,VS,C}(components, prior)
end
dim(d::MixtureModel{Multivariate}) = dim(d.components[1])
MixtureModel{C<:Distribution}(components::Vector{C}, p::Vector{Float64}) =
MixtureModel(components, Categorical(p))
# all components have the same prior probabilities
MixtureModel{C<:Distribution}(components::Vector{C}) =
MixtureModel(components, Categorical(length(components)))
#### Basic properties
length(d::MultivariateMixture) = dim(d.components[1])
size(d::MatrixvariateMixture) = size(d.components[1])
function mean(d::MixtureModel)
components(d::MixtureModel) = d.components
function mean(d::UnivariateMixture)
cs = d.components
p = d.prior
m = 0.0
for i in 1:length(d.components)
if d.probs[i] > 0.
m += mean(d.components[i]) * d.probs[i]
for i = 1:length(cs)
pi = p[i]
if pi > 0.0
m += mean(cs[i]) * pi
end
end
return m
end
function _mixpdf(d::MixtureModel, x)
p = 0.0
for i in 1:length(d.components)
p += pdf(d.components[i], x) * d.probs[i]
function mean(d::MultivariateMixture)
cs = d.components
p = d.prior
m = zeros(length(d))
for i = 1:length(cs)
pi = p[i]
if pi > 0.0
BLAS.axpy!(pi, mean(cs[i]), m)
end
end
return p
return m
end
function _mixlogpdf(d::MixtureModel, x)
l = [(logpdf(d.components[i],x)+log(d.probs[i]))::Float64
for i in find(d.probs .> 0.)]
m = maximum(l)
log(sum(exp(l.-m))) + m
function var(d::UnivariateMixture)
cs = d.components
p = d.prior
K = length(cs)
means = Array(Float64, K)
m = 0.0
v = 0.0
for i = 1:K
pi = p[i]
if pi > 0.0
ci = cs[i]
means[i] = mi = mean(ci)
m += pi * mi
v += pi * var(ci)
end
end
for i = 1:K
pi = p[i]
if pi > 0.0
v += pi * abs2(means[i] - m)
end
end
return v
end
pdf(d::MixtureModel{Univariate}, x::Real) = _mixpdf(d, x)
logpdf(d::MixtureModel{Univariate}, x::Real) = _mixlogpdf(d, x)
_pdf(d::MixtureModel{Multivariate}, x::AbstractVector) = _mixpdf(d, x)
_logpdf(d::MixtureModel{Multivariate}, x::AbstractVector) = _mixpdf(d, x)
#### Evaluation
function rand(d::MixtureModel)
i = rand(d.aliastable)
return rand(d.components[i])
function _mixpdf1(d::MixtureModel, x)
cs = d.components
p = d.prior
v = 0.0
for i = 1:length(cs)
pi = p[i]
if pi > 0.0
v += pdf(cs[i], x) * pi
end
end
return v
end
function var(d::MixtureModel)
m = 0.0
squared_mean_mixture = mean(d).^2
for i in 1:length(d.components)
if d.probs[i] > 0.
m += (var(d.components[i]) .- squared_mean_mixture .+ mean(d.components[i]).^2) * d.probs[i]
function _mixpdf!(r::DenseArray, d::MixtureModel, x)
cs = d.components
p = d.prior
fill!(r, 0.0)
t = Array(Float64, size(r))
for i = 1:length(cs)
pi = p[i]
if pi > 0.0
# compute pdf in batch
pdf!(t, cs[i], x)
# accumulate pdf in batch
BLAS.axpy!(pi, t, r)
end
end
return m
return r
end
size(d::MixtureModel) = size(d.components[1])
length(d::MixtureModel) = length(d.components[1])
function _mixlogpdf1(d::MixtureModel, x)
# using the formula below for numerical stability
#
# logpdf(d, x) = log(sum_i pri[i] * pdf(cs[i], x))
# = log(sum_i pri[i] * exp(logpdf(cs[i], x)))
# = log(sum_i exp(logpri[i] + logpdf(cs[i], x)))
# = m + log(sum_i exp(logpri[i] + logpdf(cs[i], x) - m))
#
# m is chosen to be the maximum of logpri[i] + logpdf(cs[i], x) - m
# such that the argument of exp is in a reasonable range
#
cs = d.components
p = d.prior
K = length(cs)
lp = Array(Float64, K)
m = -Inf # m <- the maximum of log(p(cs[i], x)) + log(pri[i])
for i = 1:K
pi = p[i]
if pi > 0.0
# lp[i] <- log(p(cs[i], x)) + log(pri[i])
lp[i] = lp_i = logpdf(cs[i], x) + log(pi)
if lp_i > m
m = lp_i
end
end
end
v = 0.0
for i = 1:K
if p[i] > 0.0
v += exp(lp[i] - m)
end
end
return m + log(v)
end
function _mixlogpdf!(r::DenseArray, d::MixtureModel, x)
cs = d.components
p = d.prior
K = length(cs)
n = length(r)
Lp = Array(Float64, n, K)
m = fill(-Inf, n)
for i = 1:K
pi = p[i]
if pi > 0.0
lpri = log(pi)
lp_i = view(Lp, :, i)
# compute logpdf in batch and store
logpdf!(lp_i, cs[i], x)
# in the mean time, add log(prior) to lp and
# update the maximum for each sample
for j = 1:n
lp_i[j] += lpri
if lp_i[j] > m[j]
m[j] = lp_i[j]
end
end
end
end
fill!(r, 0.0)
for i = 1:K
if p[i] > 0.0
for j = 1:n
r[j] += exp(lp_i[j] - m[j])
end
end
end
for j = 1:n
r[j] = log(r[j]) + m[j]
end
return r
end
pdf(d::UnivariateMixture, x::Real) = _mixpdf1(d, x)
logpdf(d::UnivariateMixture, x::Real) = _mixlogpdf1(d, x)
_pdf!(r::AbstractArray, d::UnivariateMixture, x::DenseArray) = _mixpdf!(r, d, x)
_logpdf!(r::AbstractArray, d::UnivariateMixture, x::DenseArray) = _mixlogpdf!(r, d, x)
_pdf(d::MultivariateMixture, x::AbstractVector) = _mixpdf1(d, x)
_logpdf(d::MultivariateMixture, x::AbstractVector) = _mixlogpdf1(d, x)
_pdf!(r::AbstractArray, d::MultivariateMixture, x::DenseMatrix) = _mixpdf!(r, d, x)
_lodpdf!(r::AbstractArray, d::MultivariateMixture, x::DenseMatrix) = _mixlogpdf!(r, d, x)
## Sampling
immutable MixtureSampler{VF,VS,Sampler} <: Sampleable{VF,VS}
csamplers::Vector{Sampler}
psampler::AliasTable
end
function MixtureSampler{VF,VS}(d::MixtureModel{VF,VS})
csamplers = map(sampler, d.components)
psampler = sampler(d.prior)
MixtureSampler{VF,VS,eltype(csamplers)}(csamplers, psampler)
end
rand(s::MixtureSampler) = rand(s.csamplers[rand(s.psampler)])
_rand!(s::MixtureSampler, x::DenseVector) = _rand!(s.csampler[rand(s.psampler)], x)
sampler(d::MixtureModel) = MixtureSampler(d)
using Distributions
using Base.Test
m = MixtureModel([Poisson(2.0), Binomial(10, 0.3)],
[0.4, 0.6])
x = rand(m, (100,))
@test_approx_eq exp(logpdf(m,x)) pdf(m,x)
@test Distributions.variate_form(typeof(m))=== Univariate
@test Distributions.value_support(typeof(m))=== Discrete
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment