Distributions.jl 10.9 KB
Newer Older
1 2
module Distributions

3
using NumericExtensions
4
using StatsBase
John Myles White's avatar
John Myles White committed
5

6
export
Dahua Lin's avatar
Dahua Lin committed
7 8 9 10 11 12 13 14
    # types
    VariateForm,
    ValueSupport,
    Univariate,
    Multivariate,
    Matrixvariate,
    Discrete,
    Continuous,
15
    Distribution,
16 17
    UnivariateDistribution,
    MultivariateDistribution,
Dan Merl's avatar
Dan Merl committed
18 19
    MatrixDistribution,
    NonMatrixDistribution,
20 21
    DiscreteDistribution,
    ContinuousDistribution,
22 23
    DiscreteUnivariateDistribution,
    DiscreteMultivariateDistribution,
Dan Merl's avatar
Dan Merl committed
24
    DiscreteMatrixDistribution,
25 26
    ContinuousUnivariateDistribution,
    ContinuousMultivariateDistribution,
Dan Merl's avatar
Dan Merl committed
27
    ContinuousMatrixDistribution,
Dahua Lin's avatar
Dahua Lin committed
28
    SufficientStats,
John Myles White's avatar
John Myles White committed
29
    Arcsine,
30 31
    Bernoulli,
    Beta,
32
    BetaPrime,
33 34 35
    Binomial,
    Categorical,
    Cauchy,
Dahua Lin's avatar
Dahua Lin committed
36
    Chi,
37
    Chisq,
Dahua Lin's avatar
Dahua Lin committed
38
    Cosine,
39
    DiagNormal,
Dahua Lin's avatar
Dahua Lin committed
40
    DiagNormalCanon,
41
    Dirichlet,
42
    DiscreteUniform,
Martin O'Leary's avatar
Martin O'Leary committed
43
    DoubleExponential,
44 45 46
    EdgeworthMean,
    EdgeworthSum,
    EdgeworthZ,
47
    EmpiricalUnivariateDistribution,
48
    Erlang,
49 50 51
    Exponential,
    FDist,
    Gamma,
52 53
    GenericMvNormal,
    GenericMvNormalCanon,
54
    Geometric,
Martin O'Leary's avatar
Martin O'Leary committed
55
    Gumbel,
56
    Hypergeometric,
57
    InverseWishart,
58
    InverseGamma,
59
    InverseGaussian,  
Dahua Lin's avatar
Dahua Lin committed
60 61
    IsoNormal,
    IsoNormalCanon,  
62
    Kolmogorov,
63 64
    KSDist,
    KSOneSided,
John Myles White's avatar
John Myles White committed
65
    Laplace,
66
    Levy,
67
    Logistic,
68
    LogNormal,
69
    MixtureModel,
70
    Multinomial,
71
    MultivariateNormal,
72
    MvNormal,
Dahua Lin's avatar
Dahua Lin committed
73
    MvNormalCanon,
74
    MvNormalKnownSigma,
75
    MvTDist,
76 77 78 79 80 81
    NegativeBinomial,
    NoncentralBeta,
    NoncentralChisq,
    NoncentralF,
    NoncentralT,
    Normal,
82
    NormalCanon,
83 84 85 86
    NormalGamma,
    NormalInverseGamma,
    NormalInverseWishart,
    NormalWishart,
87
    Pareto,
88
    Poisson,
89
    Rayleigh,
90
    Skellam,
91
    TDist,
92
    TriangularDist,
93
    Truncated,
94
    Uniform,
95
    VonMisesFisher,
96
    Weibull,
97
    Wishart,
98
    QQPair,
Dahua Lin's avatar
Dahua Lin committed
99 100

    # methods
101
    binaryentropy, # entropy of distribution in bits
102
    canonform,     # get canonical form of a distribution
103 104
    ccdf,          # complementary cdf, i.e. 1 - cdf
    cdf,           # cumulative distribution function
105 106
    cf,            # characteristic function
    cgf,           # cumulant generating function
107
    cquantile,     # complementary quantile (i.e. using prob in right hand tail)
108
    cumulant,      # cumulants of distribution
109
    complete,      # turn an incomplete formulation into a complete distribution
110
    dim,           # sample dimension of multivariate distribution
111
    entropy,       # entropy of distribution in nats
112 113
    fit,           # fit a distribution to data (using default method)
    fit_mle,       # fit a distribution to data using MLE
Dahua Lin's avatar
Dahua Lin committed
114
    fit_mle!,      # fit a distribution to data using MLE (inplace update to initial guess)
115
    fit_map,       # fit a distribution to data using MAP
116
    freecumulant,  # free cumulants of distribution
117
    gmvnormal,     # a generic function to construct multivariate normal distributions
118
    insupport,     # predicate, is x in the support of the distribution?
Dahua Lin's avatar
Dahua Lin committed
119
    invcov,        # get the inversed covariance
120 121
    invlogccdf,    # complementary quantile based on log probability
    invlogcdf,     # quantile based on log probability
122 123 124
    isplatykurtic, # Is excess kurtosis > 0.0?
    isleptokurtic, # Is excess kurtosis < 0.0?
    ismesokurtic,  # Is excess kurtosis = 0.0?
125
    isprobvec,     # Is a probability vector?
126 127 128 129
    isupperbounded,  
    islowerbounded,
    isbouned,
    hasfinitesupport,
130
    kde,           # Kernel density estimator (from Stats.jl)
131 132 133
    kurtosis,      # kurtosis of the distribution
    logccdf,       # ccdf returning log-probability
    logcdf,        # cdf returning log-probability
134
    loglikelihood, # log probability of array of IID draws
135
    logpdf,        # log probability density
136
    logpdf!,       # evaluate log pdf to provided storage
137
    logpmf,        # log probability mass
138
    logpmf!,       # evaluate log pmf to provided storage
139 140
    posterior,        # get posterior distribution given prior and observed data
    posterior_canon,  # get the canonical form of the posterior distribution
141 142 143
    posterior_mode,  # get the mode of posterior distribution
    posterior_rand,  # draw samples from the posterior distribution
    posterior_rand!, 
144
    posterior_randmodel,
145 146
    scale,         # scale parameter of a distribution
    rate,          # rate parameter of a distribution
147 148
    sqmahal,       # squared Mahalanobis distance to Gaussian center
    sqmahal!,      # inplace evaluation of sqmahal
149 150
    mean,          # mean of distribution
    median,        # median of distribution
151
    mgf,           # moment generating function
152
    mode,          # the mode of a unimodal distribution
153
    modes,         # mode(s) of distribution as vector
154
    moment,        # moments of distribution
155
    nsamples,      # get the number of samples in a data array based on distribution types
156 157 158
    pdf,           # probability density function (ContinuousDistribution)
    pmf,           # probability mass function (DiscreteDistribution)
    quantile,      # inverse of cdf (defined for p in (0,1))
159
    qqbuild,       # build a paired quantiles data structure for qqplots
160 161
    rand,          # random sampler
    rand!,         # replacement random sampler
162
    sample,        # sample from a source array
163
    sampler,       # create a Sampler object for efficient samples
164
    skewness,      # skewness of the distribution
165
    sprand,        # random sampler for sparse matrices
166
    std,           # standard deviation of distribution
Dahua Lin's avatar
Dahua Lin committed
167 168
    suffstats,     # compute sufficient statistics
    var,           # variance of distribution
169
    wsample,       # weighted sampling from a source array
170
    expected_logdet # expected logarithm of random matrix determinant
171

172 173 174 175 176
import Base.Random
import Base: show, scale, sum!, rand, rand!, sprand
import Base: mean, median, maximum, minimum, quantile, std, var, cov, cor
import NumericExtensions: dim, entropy
import StatsBase: kurtosis, skewness, mode, modes, randi, RandIntSampler
177

178

Dahua Lin's avatar
Dahua Lin committed
179 180 181 182 183
#### Distribution type system

abstract ValueSupport
type Discrete <: ValueSupport end
type Continuous <: ValueSupport end
184

Dahua Lin's avatar
Dahua Lin committed
185 186 187 188
abstract VariateForm
type Univariate <: VariateForm end
type Multivariate <: VariateForm end
type Matrixvariate <: VariateForm end
189

Dahua Lin's avatar
Dahua Lin committed
190
abstract Distribution{F<:VariateForm,S<:ValueSupport}
191

Dahua Lin's avatar
Dahua Lin committed
192 193 194
typealias UnivariateDistribution{S<:ValueSupport}   Distribution{Univariate,S}
typealias MultivariateDistribution{S<:ValueSupport} Distribution{Multivariate,S}
typealias MatrixDistribution{S<:ValueSupport}       Distribution{Matrixvariate,S}
195
typealias NonMatrixDistribution Union(UnivariateDistribution, MultivariateDistribution)
Dahua Lin's avatar
Dahua Lin committed
196 197 198 199 200 201 202 203 204 205

typealias DiscreteDistribution{F<:VariateForm}   Distribution{F,Discrete}
typealias ContinuousDistribution{F<:VariateForm} Distribution{F,Continuous}

typealias DiscreteUnivariateDistribution     Distribution{Univariate,    Discrete}
typealias ContinuousUnivariateDistribution   Distribution{Univariate,    Continuous}
typealias DiscreteMultivariateDistribution   Distribution{Multivariate,  Discrete}
typealias ContinuousMultivariateDistribution Distribution{Multivariate,  Continuous}
typealias DiscreteMatrixDistribution         Distribution{Matrixvariate, Discrete}
typealias ContinuousMatrixDistribution       Distribution{Matrixvariate, Continuous}
206

Dahua Lin's avatar
Dahua Lin committed
207
abstract SufficientStats
208 209 210 211
abstract IncompleteDistribution

typealias DistributionType{D<:Distribution} Type{D}
typealias IncompleteFormulation Union(DistributionType,IncompleteDistribution)
Dahua Lin's avatar
Dahua Lin committed
212

Dahua Lin's avatar
Dahua Lin committed
213 214 215

#### Include files

Dahua Lin's avatar
Dahua Lin committed
216 217
include("constants.jl")

218 219
include("fallbacks.jl")
include("rmath.jl")
220
include("specialfuns.jl")
221
include("tvpack.jl")
222
include("utils.jl")
223

224
include(joinpath("samplers", "categorical_samplers.jl"))
Dahua Lin's avatar
Dahua Lin committed
225
include(joinpath("samplers", "gamma_sampler.jl"))
226

227 228 229 230 231 232 233 234 235 236
# Univariate distributions
include(joinpath("univariate", "arcsine.jl"))
include(joinpath("univariate", "bernoulli.jl"))
include(joinpath("univariate", "beta.jl"))
include(joinpath("univariate", "betaprime.jl"))
include(joinpath("univariate", "binomial.jl"))
include(joinpath("univariate", "categorical.jl"))
include(joinpath("univariate", "cauchy.jl"))
include(joinpath("univariate", "chi.jl"))
include(joinpath("univariate", "chisq.jl"))
237
include(joinpath("univariate", "cosine.jl"))
238
include(joinpath("univariate", "discreteuniform.jl"))
239
include(joinpath("univariate", "empirical.jl"))
240 241 242
include(joinpath("univariate", "exponential.jl"))
include(joinpath("univariate", "fdist.jl"))
include(joinpath("univariate", "gamma.jl"))
243
include(joinpath("univariate", "edgeworth.jl"))
244
include(joinpath("univariate", "erlang.jl"))
245 246 247
include(joinpath("univariate", "geometric.jl"))
include(joinpath("univariate", "gumbel.jl"))
include(joinpath("univariate", "hypergeometric.jl"))
248
include(joinpath("univariate", "inversegamma.jl"))
249
include(joinpath("univariate", "inversegaussian.jl"))
250
include(joinpath("univariate", "kolmogorov.jl"))
251 252
include(joinpath("univariate", "ksdist.jl"))
include(joinpath("univariate", "ksonesided.jl"))
253
include(joinpath("univariate", "laplace.jl"))
254
include(joinpath("univariate", "levy.jl"))
255 256 257 258 259 260 261 262
include(joinpath("univariate", "logistic.jl"))
include(joinpath("univariate", "lognormal.jl"))
include(joinpath("univariate", "negativebinomial.jl"))
include(joinpath("univariate", "noncentralbeta.jl"))
include(joinpath("univariate", "noncentralchisq.jl"))
include(joinpath("univariate", "noncentralf.jl"))
include(joinpath("univariate", "noncentralt.jl"))
include(joinpath("univariate", "normal.jl"))
263
include(joinpath("univariate", "normalcanon.jl"))
264 265 266
include(joinpath("univariate", "pareto.jl"))
include(joinpath("univariate", "poisson.jl"))
include(joinpath("univariate", "rayleigh.jl"))
267
include(joinpath("univariate", "skellam.jl"))
268 269 270 271 272 273 274 275
include(joinpath("univariate", "tdist.jl"))
include(joinpath("univariate", "triangular.jl"))
include(joinpath("univariate", "uniform.jl"))
include(joinpath("univariate", "weibull.jl"))

# Multivariate distributions
include(joinpath("multivariate", "dirichlet.jl"))
include(joinpath("multivariate", "multinomial.jl"))
276 277
include(joinpath("multivariate", "mvnormal.jl"))
include(joinpath("multivariate", "mvnormalcanon.jl"))
278
include(joinpath("multivariate", "mvtdist.jl"))
279
include(joinpath("multivariate", "vonmisesfisher.jl"))
280 281 282 283 284 285 286 287

# Matrix distributions
include(joinpath("matrix", "inversewishart.jl"))
include(joinpath("matrix", "wishart.jl"))

# Truncated distributions
include("truncate.jl")
include(joinpath("univariate", "truncated", "normal.jl"))
288

289 290
# Mixture distributions
include("mixturemodel.jl")
291

292
# REPL representations
293
include("show.jl")
294

295
# Kernel density estimators
296
# include("kde.jl")  ## migrated to StatsBase.jl
297

298 299
# Expectations, entropy, KL divergence
include("functionals.jl")
300

301
# Posteriors and conjugate priors
302 303 304 305 306 307 308 309 310 311 312 313 314
include(joinpath("conjugates", "fallbacks.jl"))
include(joinpath("conjugates", "beta_binom.jl"))
include(joinpath("conjugates", "dirichlet_multi.jl"))
include(joinpath("conjugates", "gamma_exp.jl"))

include(joinpath("conjugates", "normalgamma.jl"))
include(joinpath("conjugates", "normalinversegamma.jl"))
include(joinpath("conjugates", "normalwishart.jl"))
include(joinpath("conjugates", "normalinversewishart.jl"))
include(joinpath("conjugates", "normal.jl"))
include(joinpath("conjugates", "mvnormal.jl"))

# other stuff
315
include("qq.jl")
Dahua Lin's avatar
Dahua Lin committed
316 317
include("estimators.jl")

318
end # module