Ce serveur Gitlab sera éteint le 30 juin 2020, pensez à migrer vos projets vers les serveurs gitlab-research.centralesupelec.fr et gitlab-student.centralesupelec.fr !

Commit 1fce06f6 authored by Simon Byrne's avatar Simon Byrne

Kolmogorov distribution and KS tests

parent 6b378855
......@@ -44,6 +44,7 @@ export # types
# InverseLink,
InverseWishart,
InvertedGamma,
Kolmogorov,
Laplace,
Levy,
# Link,
......@@ -183,6 +184,7 @@ include(joinpath("univariate", "geometric.jl"))
include(joinpath("univariate", "gumbel.jl"))
include(joinpath("univariate", "hypergeometric.jl"))
include(joinpath("univariate", "invertedgamma.jl"))
include(joinpath("univariate", "kolmogorov.jl"))
include(joinpath("univariate", "laplace.jl"))
include(joinpath("univariate", "levy.jl"))
include(joinpath("univariate", "logistic.jl"))
......
# Kolmogorov distribution
# defined as the sup_{t \in [0,1]} |B(t)|, where B(t) is a Brownian bridge
# used in the Kolmogorov--Smirnov test for large n.
immutable Kolmogorov <: ContinuousUnivariateDistribution
end
insupport(::Kolmogorov, x::Real) = zero(x) <= x < Inf
insupport(::Type{Kolmogorov}, x::Real) = zero(x) <= x < Inf
mean(d::Kolmogorov) = sqrt(pi/2.0) * log(2.0)
var(d::Kolmogorov) = pi^2/12.0 - pi/2.0*log(2.0)^2
# TODO: higher-order moments also exist, can be obtained by differentiating series
# cdf and ccdf are based on series truncation.
# two different series are available, e.g. see:
# N. Smirnov, "Table for Estimating the Goodness of Fit of Empirical Distributions",
# The Annals of Mathematical Statistics , Vol. 19, No. 2 (Jun., 1948), pp. 279-281
# http://projecteuclid.org/euclid.aoms/1177730256
# use one series for small x, one for large x
# 5 terms seems to be sufficient for Float64 accuracy
# some divergence from Smirnov's table in 6th decimal near 1.0 (e.g. 1.04): occurs in
# both series so assume error in table.
function cdf(d::Kolmogorov,x::Real)
if x > 1.0
return 1.0-ccdf(d,x)
end
s = 0.0
for i = 1:5
s += exp(-((2*i-1)*π/x)^2/8.0)
end
√2π*s/x
end
function ccdf(d::Kolmogorov,x::Real)
if x <= 1.0
return 1.0-cdf(d,x)
end
s = 0.0
for i = 1:5
s += (iseven(i) ? -1 : 1)*exp(-2.0*(i*x)^2)
end
2.0*s
end
......@@ -152,6 +152,23 @@ for d in [Arcsine(),
@check_deviation "median" m m_hat
end
# Kolmogorov-Smirnov test
if isa(d, ContinuousDistribution)
c = cdf(d,x)
sort!(c)
for i = 1:n_samples
c[i] = c[i]*n_samples - i
end
a = max(abs(c))
for i = 1:n_samples
c[i] += 1.0
end
b = max(abs(c))
ks = max(a,b)
kss = ks/n_samples
ksp = ccdf(Kolmogorov(),ks/sqrt(n_samples))
@printf " KS statistic = %10.3e, p-value = %8.6f \n" kss ksp
end
println()
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment