Ce serveur Gitlab sera éteint le 30 juin 2020, pensez à migrer vos projets vers les serveurs gitlab-research.centralesupelec.fr et gitlab-student.centralesupelec.fr !

Commit ec786a51 by Simon Byrne

### added KSDist and KSOneSided, implemented basic algorithms.

parent 71e9cfa1
 ... ... @@ -46,6 +46,8 @@ export # types InverseWishart, InvertedGamma, Kolmogorov, KSDist, KSOneSided, Laplace, Levy, # Link, ... ... @@ -195,6 +197,8 @@ include(joinpath("univariate", "gumbel.jl")) include(joinpath("univariate", "hypergeometric.jl")) include(joinpath("univariate", "invertedgamma.jl")) include(joinpath("univariate", "kolmogorov.jl")) include(joinpath("univariate", "ksdist.jl")) include(joinpath("univariate", "ksonesided.jl")) include(joinpath("univariate", "laplace.jl")) include(joinpath("univariate", "levy.jl")) include(joinpath("univariate", "logistic.jl")) ... ...
 # Kolmogorov distribution # defined as the sup_{t \in [0,1]} |B(t)|, where B(t) is a Brownian bridge # used in the Kolmogorov--Smirnov test for large n. immutable Kolmogorov <: ContinuousUnivariateDistribution end ... ... @@ -44,3 +45,25 @@ function ccdf(d::Kolmogorov,x::Real) end 2.0*s end # TODO: figure out how best to truncate series function pdf(d::Kolmogorov,x::Real) if x <= 0.0 return 0.0 elseif x <= 1.0 c = π/(2.0*x) s = 0.0 for i = 1:20 k = ((2*i-1)*c)^2 s += (k-1.0)*exp(-k/2.0) end return √2π*s/x^2 else s = 0.0 for i = 1:20 s += (iseven(i) ? -1 : 1)*i^2*exp(-2.0*(i*x)^2) end return 8.0*x*s end end
 # Distribution of the (two-sided) Kolmogorov-Smirnoff statistic # D_n = \sup_x |\hat{F}_n(x) -F(x)| # sqrt(n) D_n converges a.s. to the Kolmogorov distribution. immutable KSDist <: ContinuousUnivariateDistribution n::Int end insupport(d::KSDist, x::Real) = 1/(2*d.n) <= x <= 1.0 # Simard and L'Ecuyer (2011) meta-algorithm function cdf(d::KSDist,x::Real) n = d.n b = x*n # known exact values if b <= 0.5 return 0.0 elseif b <= 1.0 # could be improved return exp(lfact(n)+n*(log(2.0*b-1.0)-log(n))) elseif x >= 1.0 return 1.0 elseif b >= n-1 return 1.0 - 2.0*(1.0-x)^n end a = b*x if a >= 18.0 return 1.0 elseif n <= 140 if a <= 0.754693 return durbin(d,x) elseif a <= 4.0 return pomeranz(d,x) else # Miller (1956) approximation return 1.0 - 2.0*ccdf(KSOneSided(d.n),x) end elseif n <= 10_000 if b*sqrt(n) <= 1.4 return durbin(d,x) end end pelzgood(d,x) end # Simard and L'Ecuyer (2011) meta-algorithm function ccdf(d::KSDist,x::Real) n = d.n b = x*n # Ruben and Gambino (1982) known exact values if b <= 0.5 return 1.0 elseif b <= 1.0 return 1.0-exp(lfact(n)+n*(log(2.0*b-1.0)-log(n))) elseif x >= 1.0 return 0.0 elseif b >= n-1 return 2.0*(1.0-x)^n end a = b*x if a >= 370.0 return 0.0 elseif a >= 4.0 || (n > 140 && a >= 2.2) # Miller (1956) approximation return 2.0*ccdf(KSOneSided(d.n),x) end 1.0-cdf(d,x) end # Durbin matrix CDF method, based on Marsaglia, Tsang and Wang (2003) # modified to avoid need for exponent tracking function durbin(d::KSDist,x::Float64) # a = x*x*n # if (a > 7.24) || (a > 3.76) && (n > 99)) # return 1 - 2.0*exp(-(2.000071+.331/sqrt(n)+1.409/n)*a) # end n = d.n k = iceil(n*x) h = k-n*x m = 2*k-1 H = Array(Float64,m,m) for i = 1:m, j = 1:m H[i,j] = i-j+1 >= 0 ? 1.0 : 0.0 end for i = 1:m H[i,1] -= h^i H[m,i] -= h^(m-i+1) end H[m,1] += h > 0.5 ? (2*h-1) : 0.0 for i = 1:m, j = 1:m # we can avoid keeping track of the exponent by dividing by e # (from Stirling's approximation) H[i,j] /= e for g = 1:max(i-j+1,0) H[i,j] /= g end end Q = H^n s = Q[k,k] # need to multiply by n!*(e/n)^n if n < 500 for i = 1:n s *= i/n*e end else # 3rd-order Stirling's approximation more accurate for large n twn = 12.0*n s*sqrt(2.0*pi*n)*(1.0 + twn\(1 + (2.0*twn)\(1 - (15.0*twn)\139.0))) end s end # CDF method of Pelz and Good (1976) # based on asymptotic series, accurate for large n function pelzgood(d,x) n = d.n sqrtn = sqrt(n) z = x/sqrtn K0 = cdf(Kolmogorov(),z) K1 = x K0 + K1/sqrtn + K2/n + K3/(n*sqrtn) end function pomeranz(d,x) n = d.n t = n*x t = big(x)*n end \ No newline at end of file
 # Distribution of the (two-sided) Kolmogorov-Smirnoff statistic # D_n = \sup_x |\hat{F}_n(x) -F(x)| # sqrt(n) D_n converges a.s. to the Kolmogorov distribution. immutable KSDist <: ContinuousUnivariateDistribution n::Int end insupport(d::KSDist, x::Real) = 1/(2*d.n) <= x <= 1.0 # TODO: implement Simard and L'Ecuyer (2011) meta-algorithm # requires Pomeranz and Pelz-Good algorithms function cdf(d::KSDist,x::Real) n = d.n b = x*n # known exact values if b <= 0.5 return 0.0 elseif b <= 1.0 # accuracy could be improved return exp(lfact(n)+n*(log(2.0*b-1.0)-log(n))) elseif x >= 1.0 return 1.0 elseif b >= n-1 return 1.0 - 2.0*(1.0-x)^n end a = b*x if a >= 18.0 return 1.0 elseif n <= 10_000 if a <= 4.0 return cdf_durbin(d,x) else return 1.0 - ccdf_miller(d,x) end else return cdf(Kolmogorov(),sqrt(a)) end end function ccdf(d::KSDist,x::Real) n = d.n b = x*n # Ruben and Gambino (1982) known exact values if b <= 0.5 return 1.0 elseif b <= 1.0 return 1.0-exp(lfact(n)+n*(log(2.0*b-1.0)-log(n))) elseif x >= 1.0 return 0.0 elseif b >= n-1 return 2.0*(1.0-x)^n end a = b*x if a >= 370.0 return 0.0 elseif a >= 4.0 || (n > 140 && a >= 2.2) return ccdf_miller(d,x) else return 1.0-cdf(d,x) end end # Durbin matrix CDF method, based on Marsaglia, Tsang and Wang (2003) # modified to avoid need for exponent tracking function cdf_durbin(d::KSDist,x::Float64) n = d.n k, ch, h = ceil_rems_mult(n,x) m = 2*k-1 H = Array(Float64,m,m) for i = 1:m, j = 1:m H[i,j] = i-j+1 >= 0 ? 1.0 : 0.0 end r = 1.0 for i = 1:m # (1-h^i) = (1-h)(1+h+...+h^(i-1)) H[i,1] = H[m,m-i+1] = ch*r r += h^i end H[m,1] += h <= 0.5 ? -h^m : -h^m+(h-ch) for i = 1:m, j = 1:m for g = 1:max(i-j+1,0) H[i,j] /= g end # we can avoid keeping track of the exponent by dividing by e # (from Stirling's approximation) H[i,j] /= e end Q = H^n s = Q[k,k] s*stirling(n) end # Miller (1956) approximation function ccdf_miller(d::KSDist, x::Real) 2.0*ccdf(KSOneSided(d.n),x) end ## these functions are used in durbin and pomeranz algorithms # calculate exact remainders the easy way function floor_rems_mult(n,x) t = big(x)*big(n) fl = floor(t) lrem = t - fl urem = (fl+one(fl)) - t return convert(typeof(n),fl), convert(typeof(x),lrem), convert(typeof(x),urem) end function ceil_rems_mult(n,x) t = big(x)*big(n) cl = ceil(t) lrem = t - (cl - one(cl)) urem = cl - t return convert(typeof(n),cl), convert(typeof(x),lrem), convert(typeof(x),urem) end # n!*(e/n)^n function stirling(n) if n < 500 s = 1.0 for i = 1:n s *= i/n*e end return s else # 3rd-order Stirling's approximation more accurate for large n twn = 12.0*n return sqrt(2.0*pi*n)*(1.0 + twn\(1 + (2.0*twn)\(1 - (15.0*twn)\139.0))) end end
 # Distribution of the one-sided Kolmogorov-Smirnov test statistic: # D^+_n = \sup_x (\hat{F}_n(x) -F(x)) immutable KSOneSided <: ContinuousUnivariateDistribution n::Int end insupport(d::KSOneSided, x::Real) = 0.0 <= x <= 1.0 # formula of Birnbaum and Tingey (1951) function ccdf(d::KSOneSided,x::Real) if x >= 1.0 return 0.0 elseif x <= 0.0 return 1.0 end n = d.n s = 0.0 for j = 0:ifloor(n-n*x) p = x+j/n s += pdf(Binomial(n,p),j) / p end s*x end cdf(d::KSOneSided,x::Real) = 1.0 - ccdf(d,x)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!