###Description
##
###Computes the probability than an n-dimensional (n>1) multivariate
###normal distribution lies inside a rectangles of R^n. Suited for large
###probabilities. In particular, it can be used to compute p-values of
###tests.
##
###Usage
##
###msvdnorm(lower=Null,upper=Null,Sigma=Null,Nsample=10000,Ncov=1)
##
###Arguments
##
###lower the vector of lower limits of length n.
###upper the vector of upper limits of length n.
###Sigma the covariance matrix of dimension n.
###Nsample the sample size.
###Ncov the number of control variates to be used (<=n).
##
##
###Details
##
###This program involves the computation of multivariate normal
###probabilities with arbitrary covariance matrices. The methodology is
###described in Phinikettos and Gandy (2010), Computational Statistics &
###Data Analysis.
##
###Value
##
###The evaluated probability is returned.
##
###Examples
##
## # source("psvdnorm.R")
## #lower <- rep(-5, 10)
## #upper <- rep(5, 10)
## #a <- matrix(rnorm(100),10,10)
## #Sigma <- a %*% t(a)
## #psvdnorm(lower,upper,Sigma,Nsample=10000,Ncov=5)
##
###Disclaimer
##
###This is experimental code - use at your own risk. There is no
###guarantee that it is stable for all possible inputs. The code is
###designed to work for high-dimensional cases and for cases with
###high results (close to 1).
##
##
##
###THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
###"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
###LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
###A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
###OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
###SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
###LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
###DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
###THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
###(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
###OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE
########################################################################################
########################################################################################
########### Importance Sampling Section 2.2 ############################################
########################################################################################
SVD_cond_IS_fast <- function(initialsteps,maxsteps,var,U,d,Lower,Upper) {
lower <- Lower + (Upper-Lower)* (U[,1]<0)
upper <- Upper + (Lower-Upper)* (U[,1]<0)
nIS=length(d)-1
Ucond <- U[,-1,drop=FALSE]
dcond <- d[-1]
p <- vector("numeric")
S <- vector("numeric")
slong <- vector("numeric")
sumIS <- vector("numeric")
S[1] <- var
while(length(slong)<maxsteps){
i <- length(S)
n <- max(initialsteps,length(slong))
nsamples <- min(n,maxsteps-length(slong))
if (nsamples<0) break
Zcond <- matrix(rnorm(nsamples*nIS,0,sqrt(S[i])),nIS,nsamples)
h <- Ucond %*% (dcond*Zcond)
a <- (lower - h)/(d[1]*U[,1])
b <- (upper - h)/(d[1]*U[,1])
sumISS <- colSums(Zcond^2)
w <-exp(-1/2*sumISS*(1-1/S[i]) ) * (sqrt(S[i]))^nIS
maxa <- apply(a,2,max)
minb <- apply(b,2,min)
phi <- ((pnorm(minb)-pnorm(maxa))*(maxa < minb))
P <- phi*w
slong <- c(slong,rep(1,nsamples))
p <- c(p,P)
sumIS <- c(sumIS,sumISS)
if (initialsteps < maxsteps) {
z <- function(s0){
s <- s0[1]
if (s<=0) return(Inf);
W <- exp( -1/2*sumIS*(1-1/s) + nIS/2*log(s) )
res <- mean(W*p)
res
}
optimisation <- optimize(z,c(0,5),tol=0.00001)
S[i+1] <- optimisation$min
if ( all( abs(p-p[1])<1e-15 ) ) S[i+1]=S[i]
}
}
list(p=mean(p),sd=sd(p)/sqrt(length(p)),S=S[length(S)])
}
###########################################################################
SVD_cond_IS_fast2 <- function(initialsteps,maxsteps,var,U,d,Lower,Upper) {
lower <- Lower + (Upper-Lower)* (U[,1]<0)
upper <- Upper + (Lower-Upper)* (U[,1]<0)
nIS=length(d)-1
Ucond <- U[,-1,drop=FALSE]
dcond <- d[-1]
p <- vector("numeric")
S <- vector("numeric")
slong <- vector("numeric")
sumIS <- vector("numeric")
S[1] <- var
while(length(slong)<maxsteps){
i <- length(S)
n <- max(initialsteps,length(slong))
nsamples <- min(n,maxsteps-length(slong))
if (nsamples<0) break
Zcond <- matrix(rnorm(nsamples*nIS,0,sqrt(S[i])),nIS,nsamples)
h <- Ucond %*% (dcond*Zcond)
a <- (lower - h)/(d[1]*U[,1])
b <- (upper - h)/(d[1]*U[,1])
sumISS <- colSums(Zcond^2)
w <-exp(-1/2*sumISS*(1-1/S[i]) ) * (sqrt(S[i]))^nIS
maxa <- apply(a,2,max)
minb <- apply(b,2,min)
phi <- (1-(pnorm(minb)-pnorm(maxa))*(maxa < minb))
P <- phi*w
slong <- c(slong,rep(1,nsamples))
p <- c(p,P)
sumIS <- c(sumIS,sumISS)
if (initialsteps < maxsteps) {
z <- function(s0){
s <- s0[1]
if (s<=0) return(Inf);
W <- exp( -1/2*sumIS*(1-1/s) + nIS/2*log(s) )
res <- mean(W*p)
res
}
optimisation <- optimize(z,c(0,5),tol=0.00001)
S[i+1] <- optimisation$min
if ( all( abs(p-p[1])<1e-15 ) ) S[i+1]=S[i]
}
}
list(p=mean(p),sd=sd(p)/sqrt(length(p)),S=S[length(S)])
}
################################################################################
########### Optimal splitting parameters Section 2.3 ###########################
################################################################################
SVD_cond_split_optimal_few <- function(R,U,d,var,Lower,Upper){
if (length(Lower) > 2 ){
lower <- Lower + (Upper-Lower)* (U[,1]<0)
upper <- Upper + (Lower-Upper)* (U[,1]<0)
Ucond <- U[,-1,drop=FALSE]
dcond <- d[-1]
n <- length(d)-1
cumk <- cumsum(dcond^2)
sumk <- sum(dcond^2)
nSplit <- which(cumk>0.85*sumk)[1]
if (nSplit > floor(n/2) & n >1 ) nSplit = floor(n/2)
Zcond <- matrix(rnorm(R*n,0,sqrt(var)),n,R)
Zcond <- matrix(Zcond,n,R*2)
hunsplit <- Ucond[,(nSplit+1):n,drop=FALSE] %*% (dcond[(nSplit+1):n]*Zcond[(nSplit+1):n,])
Zcond[1:nSplit,] <- rnorm(2*R*nSplit,0,sqrt(var))
hsplit <- Ucond[,1:nSplit,drop=FALSE] %*% (dcond[1:nSplit]*Zcond[1:nSplit,,drop=FALSE])
h <- hsplit + hunsplit
a <- (lower -h)/(d[1]*U[,1])
b <- (upper -h)/(d[1]*U[,1])
sumIS <- colSums(Zcond^2)
w <-exp(-1/2*sumIS*(1-1/var) ) * sqrt(var)^n
maxa <- apply(a,2,max)
minb <- apply(b,2,min)
P <- ((pnorm(minb)-pnorm(maxa))*(maxa <= minb))*w
A <- P[1:R]
B <- P[(R+1):(2*R)]
if (abs(sum(A-B))>0) rho <- abs(cor(A,B))
else rho <- 0.01
a1= n-nSplit
b1= nSplit
timeSplit = (a1)*( sqrt(b1/(a1)) + sqrt(rho) )^2
optS=sqrt(a1/(rho*b1))
list(timeSplit=nSplit,optimalS=optS)
}
else list(timeSplit=1,optimalS=1)
}
#######################################################################
SVD_cond_split_optimal_few2 <- function(R,U,d,var,Lower,Upper){
if (length(Lower) > 2 ){
lower <- Lower + (Upper-Lower)* (U[,1]<0)
upper <- Upper + (Lower-Upper)* (U[,1]<0)
Ucond <- U[,-1,drop=FALSE]
dcond <- d[-1]
n <- length(d)-1
cumk <- cumsum(dcond^2)
sumk <- sum(dcond^2)
nSplit <- which(cumk>0.85*sumk)[1]
if (nSplit > floor(n/2)& n >1) nSplit = floor(n/2)
Zcond <- matrix(rnorm(R*n,0,sqrt(var)),n,R)
Zcond <- matrix(Zcond,n,R*2)
hunsplit <- Ucond[,(nSplit+1):n,drop=FALSE] %*% (dcond[(nSplit+1):n]*Zcond[(nSplit+1):n,,drop=FALSE])
Zcond[1:nSplit,] <- rnorm(2*R*nSplit,0,sqrt(var))
hsplit <- Ucond[,1:nSplit,drop=FALSE] %*% (dcond[1:nSplit]*Zcond[1:nSplit,])
h <- hsplit + hunsplit
a <- (lower -h)/(d[1]*U[,1])
b <- (upper -h)/(d[1]*U[,1])
sumIS <- colSums(Zcond^2)
w <-exp(-1/2*sumIS*(1-1/var) ) * sqrt(var)^n
maxa <- apply(a,2,max)
minb <- apply(b,2,min)
P <- (1-(pnorm(minb)-pnorm(maxa))*(maxa <= minb))*w
A <- P[1:R]
B <- P[(R+1):(2*R)]
if (abs(sum(A-B))>0) rho <- abs(cor(A,B))
else rho <- 0.01
a1= n-nSplit
b1= nSplit
timeSplit = (a1)*( sqrt(b1/(a1)) + sqrt(rho) )^2
optS=sqrt(a1/(rho*b1))
list(timeSplit=nSplit,optimalS=optS)
}
else list(timeSplit=1,optimalS=1)
}
########################################################################
############ Splitting + control variates Sections 2.3-2.4 #############
########################################################################
SVD_cond_split_control_row <- function(ncov,nSplit,R,S,U,d,var,Lower,Upper){
n <- length(d)-1
lower <- Lower + (Upper-Lower)* (U[,1]<0)
upper <- Upper + (Lower-Upper)* (U[,1]<0)
sdcontrol <- vector("numeric")
pcontrol <- matrix(0,R*S,ncov)
pmeancontrol <- vector("numeric")
p <- vector("numeric")
Zsplit <- matrix(rnorm(R*nSplit,0,sqrt(var)),nSplit,R)
if (n>1) Zunsplit <- matrix(rnorm(R*(n-nSplit),0,sqrt(var)),n-nSplit,R)
Ucond <- U[,-1,drop=FALSE]
dcond <- d[-1]
hsplit <- Ucond[,1:nSplit,drop=FALSE] %*% (dcond[1:nSplit]*Zsplit)
hunsplit = 0
if (n>1) hunsplit <- Ucond[,(nSplit+1):n,drop=FALSE] %*% (dcond[(nSplit+1):n]*Zunsplit)
h <- hsplit + hunsplit
a <- (lower -h)/(d[1]*U[,1])
b <- (upper -h)/(d[1]*U[,1])
sumISsplit <- colSums(Zsplit^2)
sumISunsplit = 0
if (n>1) sumISunsplit <- colSums(Zunsplit^2)
sumIS <- sumISunsplit + sumISsplit
w <-exp(-1/2*sumIS*(1-1/var) ) * sqrt(var)^n
maxa <- apply(a,2,max)
minb <- apply(b,2,min)
argbs <- apply(b,2,order)[1,]
freq <- sapply(1:length(d),function(i) sum(argbs==i))
argb <- order(freq,decreasing=T)[1:ncov]
if (ncov>0) {
for (i in 1:ncov) sdcontrol[i] <- sqrt(sum( (U[argb[i],] * d)^2 ))
for (i in 1:ncov) pmeancontrol[i] <- (pnorm(Upper[argb[i]],0, sdcontrol[i])- pnorm(Lower[argb[i]],0, sdcontrol[i]))
}
P <- ((pnorm(minb)-pnorm(maxa))*(maxa <= minb))*w
p <- c(p,P)
if (ncov>0) {
for (i in 1:ncov) {
maxca <- (lower[argb[i]] - h[argb[i],])/(d[1]*U[argb[i],1])
mincb <- (upper[argb[i]] - h[argb[i],])/(d[1]*U[argb[i],1])
pcontrol[1:R,i] <- ((pnorm(mincb)-pnorm(maxca))*(maxca <= mincb))*w -pmeancontrol[i]
}
}
if (S >1){
for (i in 2:S){
Zsplit[1:nSplit,] <- rnorm(nSplit*R,0,sqrt(var))
hsplit <- Ucond[,1:nSplit,drop=FALSE] %*% (dcond[1:nSplit]*Zsplit)
h <- hsplit + hunsplit
a <- (lower -h)/(d[1]*U[,1])
b <- (upper -h)/(d[1]*U[,1])
sumISsplit <- colSums(Zsplit^2)
sumIS <- sumISunsplit + sumISsplit
w <-exp(-1/2*sumIS*(1-1/var) ) * sqrt(var)^n
maxa <- apply(a,2,max)
minb <- apply(b,2,min)
P <- ((pnorm(minb)-pnorm(maxa))*(maxa <= minb))*w
p <- c(p,P)
if (ncov>0) {
for (j in 1:ncov) {
maxca <- (lower[argb[j]] - h[argb[j],])/(d[1]*U[argb[j],1])
mincb <- (upper[argb[j]] - h[argb[j],])/(d[1]*U[argb[j],1])
pcontrol[((i-1)*R+1):(i*R),j] <- ((pnorm(mincb)-pnorm(maxca))*(maxca <= mincb))*w -pmeancontrol[j]
}
}
}
}
pc = p
if (ncov>0) {
m <- lm(p~pcontrol)
beta <- as.vector(m$coefficients)
pc <- beta[1]
}
list(p=p,pc=pc)
}
#################################################################
SVD_cond_split_control_row2 <- function(ncov,nSplit,R,S,U,d,var,Lower,Upper){
n <- length(d)-1
lower <- Lower + (Upper-Lower)* (U[,1]<0)
upper <- Upper + (Lower-Upper)* (U[,1]<0)
sdcontrol <- vector("numeric")
pcontrol <- matrix(0,R*S,ncov)
pmeancontrol <- vector("numeric")
p <- vector("numeric")
Zsplit <- matrix(rnorm(R*nSplit,0,sqrt(var)),nSplit,R)
if (n>1) Zunsplit <- matrix(rnorm(R*(n-nSplit),0,sqrt(var)),n-nSplit,R)
Ucond <- U[,-1,drop=FALSE]
dcond <- d[-1]
hsplit <- Ucond[,1:nSplit,drop=FALSE] %*% (dcond[1:nSplit]*Zsplit)
hunsplit = 0
if (n>1) hunsplit <- Ucond[,(nSplit+1):n,drop=FALSE] %*% (dcond[(nSplit+1):n]*Zunsplit)
h <- hsplit + hunsplit
a <- (lower -h)/(d[1]*U[,1])
b <- (upper -h)/(d[1]*U[,1])
sumISsplit <- colSums(Zsplit^2)
sumISunsplit = 0
if (n>1) sumISunsplit <- colSums(Zunsplit^2)
sumIS <- sumISunsplit + sumISsplit
w <-exp(-1/2*sumIS*(1-1/var) ) * sqrt(var)^n
maxa <- apply(a,2,max)
minb <- apply(b,2,min)
argbs <- apply(b,2,order)[1,]
freq <- sapply(1:length(d),function(i) sum(argbs==i))
argb <- order(freq,decreasing=T)[1:ncov]
if (ncov>0) {
for (i in 1:ncov) sdcontrol[i] <- sqrt(sum( (U[argb[i],] * d)^2 ))
for (i in 1:ncov) pmeancontrol[i] <- 1-(pnorm(Upper[argb[i]],0, sdcontrol[i])- pnorm(Lower[argb[i]],0, sdcontrol[i]))
}
P <- (1-(pnorm(minb)-pnorm(maxa))*(maxa <= minb))*w
p <- c(p,P)
if (ncov>0) {
for (i in 1:ncov) {
maxca <- (lower[argb[i]] - h[argb[i],])/(d[1]*U[argb[i],1])
mincb <- (upper[argb[i]] - h[argb[i],])/(d[1]*U[argb[i],1])
pcontrol[1:R,i] <- (1-(pnorm(mincb)-pnorm(maxca))*(maxca <= mincb))*w -pmeancontrol[i]
}
}
if (S >1){
for (i in 2:S){
Zsplit[1:nSplit,] <- rnorm(nSplit*R,0,sqrt(var))
hsplit <- Ucond[,1:nSplit,drop=FALSE] %*% (dcond[1:nSplit]*Zsplit)
h <- hsplit + hunsplit
a <- (lower -h)/(d[1]*U[,1])
b <- (upper -h)/(d[1]*U[,1])
sumISsplit <- colSums(Zsplit^2)
sumIS <- sumISunsplit + sumISsplit
w <-exp(-1/2*sumIS*(1-1/var) ) * sqrt(var)^n
maxa <- apply(a,2,max)
minb <- apply(b,2,min)
P <- (1-(pnorm(minb)-pnorm(maxa))*(maxa <= minb))*w
p <- c(p,P)
if (ncov>0) {
for (j in 1:ncov) {
maxca <- (lower[argb[j]] - h[argb[j],])/(d[1]*U[argb[j],1])
mincb <- (upper[argb[j]] - h[argb[j],])/(d[1]*U[argb[j],1])
pcontrol[((i-1)*R+1):(i*R),j] <- (1-(pnorm(mincb)-pnorm(maxca))*(maxca <= mincb))*w -pmeancontrol[j]
}
}
}
}
pc = p
if (ncov>0) {
m <- lm(p~pcontrol)
beta <- as.vector(m$coefficients)
pc <- beta[1]
}
list(p=p,pc=pc)
}
########################################################################
######### coordinating the four variance reduction techniques ##########
########################################################################
SVD_split_cond_IS_row2 <- function(ncov,sampleIS,R,U,d,var,lower,upper){
n=length(d)-1
checkp <- SVD_cond_IS_fast(50,50,1,U,d,lower,upper)
checkp <- mean(checkp$p)
if (checkp > 0.5){
timeIS <- system.time(optimalIS <- SVD_cond_IS_fast2(floor(sampleIS/(3*10)),floor(sampleIS/3),var,U,d,lower,upper))[1]
s <- optimalIS$S
optimalSplit <- SVD_cond_split_optimal_few2(R,U,d,s,lower,upper)
nSplit= optimalSplit$timeSplit
a=n-nSplit
b=nSplit
optS <- max(1,floor(optimalSplit$optimalS))
ratio <- (optS * nSplit +(n-nSplit))/n
optR <- floor(sampleIS/ratio)
timeSplit <- system.time(Split <- SVD_cond_split_control_row2(ncov,nSplit,optR,optS,U,d,s,lower,upper))[1]
psplit <- Split$p
pcov <- Split$pc
pcov <- 1-pcov
}
if (checkp <= 0.5){
timeIS <- system.time(optimalIS <- SVD_cond_IS_fast(floor(sampleIS/(3*10)),floor(sampleIS/3),var,U,d,lower,upper))[1]
s <- optimalIS$S
optimalSplit <- SVD_cond_split_optimal_few(R,U,d,s,lower,upper)
nSplit= optimalSplit$timeSplit
a=n-nSplit
b=nSplit
optS <- max(1,floor(optimalSplit$optimalS))
ratio <- (optS * nSplit +(n-nSplit))/n
optR <- floor(sampleIS/ratio)
timeSplit <- system.time(Split <- SVD_cond_split_control_row(ncov,nSplit,optR,optS,U,d,s,lower,upper))[1]
psplit <- Split$p
pcov <- Split$pc
}
mean(pcov)
}
####################################################################
############# main function, computing the svd #####################
####################################################################
psvdnorm <- function(lower,upper,Sigma=diag(rep(1,length(lower))),Nsample=10000,Ncov=1){
svdS <- svd(Sigma)
U <- svdS$u
d <- sqrt(svdS$d)
SVD_split_cond_IS_row2(Ncov,Nsample,1000,U,d,1,lower,upper)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.