# montecarlotest.R
## Dependencies: snowfall (parallel computing)
## requries loglik and getParameters to be defined functions. Here we just set up there generics.
# loglik isn't a standard S3 generic, so we add it:
loglik <- function(x, y, ...) UseMethod("loglik")
# getParameters isn't a standard S3 generic, so we add it:
getParameters <- function(x, y, ...) UseMethod("getParameters")
montecarlotest <- function(null, test, nboot = 100, cpu = 2,
GetParNames=TRUE, times=NA, ...){
# bootstrap the likelihood ratio
# Args:
# null: a model with methods "simulate", "update", "loglik", "getParameters"
# test: another such model
# nboot: number of replicates
# cpu: number of cpus available on the machine. Note: if sfInit has already
# been called and not stopped, those conditions will be used and this
# will be ignored.
# GetParNames: optionally attempt to report parameter names in the bootstrap
# ...: options passed to the optim routine
# times: simulations can be sampled at specified times. If NA, will draw
# sample points in simulating the process that match those of the
# original data. If just a scalar number is given, will use the same
# interval, but draw that number of sample points. If a numeric
# vector is given, it will sample at those time intervals.
#
# FIXME needs to be extended to sample at times longer/shorter
# than original domain. this is really a function of the simulate
# method for the model objects, the montecarlotest function just
# passes this to the simulation function.
#
# Returns:
# A list with the following objects:
# null, test: the models passed in as null and test,
# nboot: number of replicate simulations used,
# null_dist, test_dist: The distribution of Likelihood ratios observed
# when simulating under the respective model
# null_par_dist: distribution of parameters observed for null model,
# simulating under the null model,
# test_par_dist: likewise for test model
# null_sim_test_pars: parameters estimated for the test model over
# simulations for the null model
# test_sim_null_pars: parameters estimated fro the null model over
# simulations for the test model.
#
## are we in parallel already? If not, how many cpus do we have?
if(cpu>1 & !sfIsRunning()){
sfInit(parallel=TRUE, cpu=cpu)
sfLibrary(warningsignals)
sfExportAll()
} else if(cpu<2 & !sfIsRunning()){
sfInit()
}
## generate each distribution
null_sim <- sfSapply(1:nboot, function(i){
data <- simulate(null, times=times)
if (is(data,"list")) data <- data$rep.1 # handle data that has replicates
null <- update(null, data, ...)
test <- update(test, data, ...)
lr <- -2*(loglik(null) - loglik(test))
list(lr, getParameters(null), getParameters(test))
})
test_sim <- sfSapply(1:nboot, function(i){
data <- simulate(test, times=times)
if (is(data,"list")) data <- data$rep.1 # handle data that has replicates
null <- update(null, data, ...)
test <- update(test, data, ...)
lr <- -2*(loglik(null) - loglik(test))
list(lr, getParameters(null), getParameters(test))
})
## grab the distribution of likelihood ratios
null_dist <- unlist(null_sim[1,])
test_dist <- unlist(test_sim[1,])
## grab dist of pars observed for null model under null model sims
null_bootstrap_pars <- matrix( unlist(null_sim[2,]), ncol=nboot)
if(GetParNames) rownames(null_bootstrap_pars) <- names(getParameters(null))
## grab dist of pars observed for test model under test model sims
test_bootstrap_pars <- matrix( unlist(test_sim[3,]), ncol=nboot)
if(GetParNames) rownames(test_bootstrap_pars) <- names(getParameters(test))
### Grab the cross-case bootstraps too:
## grab dist of pars observed for test model under null model sims
null_sim_test_pars <- matrix( unlist(null_sim[3,]), ncol=nboot)
if(GetParNames) rownames(null_sim_test_pars) <- names(getParameters(test))
## grab dist of pars observed for null model under test model sims
test_sim_null_pars <- matrix( unlist(test_sim[2,]), ncol=nboot)
if(GetParNames) rownames(test_sim_null_pars) <- names(getParameters(null))
## format the output
output <- list(null=null, test=test, nboot=nboot,
null_dist=null_dist, test_dist=test_dist,
null_par_dist = null_bootstrap_pars,
test_par_dist = test_bootstrap_pars,
null_sim_test_pars = null_sim_test_pars,
test_sim_null_pars = test_sim_null_pars)
class(output) <- "pow"
output
}
KL_divergence <- function(null_dist, test_dist){
nd <- density(null_dist)
td <- density(test_dist)
P <- nd$y[nd$y > 0 & td$y > 0]
Q <- td$y[nd$y > 0 & td$y > 0]
KL_div <- P %*% log(P/Q)
KL_div
}
overlap <- function(pow, bw="nrd0"){
nd <- density(pow$null_dist, bw=bw)
td <- density(pow$test_dist, bw=bw)
td$y %*% nd$y
}
## DOF calculation ##!! Should be made into a generic!!
dof <- function(object){
dof <- object$k
if(is.null(object$k)){
print(paste("cannot determine degrees of freedom, k,
please input for model m as m$k"))
}
dof
}
## can now specify the info criterion
## plotting function
plot.pow <- function(pow, threshold=.95, main="", legend=FALSE, type="density",
test_dist=TRUE, shade_power=FALSE, shade_p=FALSE,
show_aic=FALSE, show_data=TRUE, shade=TRUE,
shade_aic=FALSE, print_text=TRUE, show_text = c("p"),
xlim=NULL, ylim=NULL, null_dist=TRUE, bw = "nrd0",
info_criterion=c("aic", "bic", "aicc", "threshold"), ...){
## Calculate the densities
nd <- density(pow$null_dist, bw=bw)
td <- density(pow$test_dist, bw=bw)
n_null <- length(pow$null_dist)
n_test <- length(pow$test_dist)
## Select the information criterion
info_criterion = match.arg(info_criterion)
if(info_criterion=="aic"){
threshold_mark <- 2*(dof(pow$test) - dof(pow$null))
} else if(info_criterion=="threshold") {
threshold_tail <- sort(pow$null_dist)[ round(threshold*n_null) ]
threshold_mark <- threshold_tail #nd$x[tmp]
print(paste("threshold", threshold_mark))
}
## Calculate statistics FIXME (should be a seperate function of pow)
threshold_tail <- sort(pow$null_dist)[ round(threshold*n_null) ]
power <- sum(pow$test_dist > threshold_tail)/n_test
lr <- -2*(loglik(pow$null) - loglik(pow$test))
p <- 1-sum(pow$null_dist < lr)/pow$n_null
print(paste("power is ", power, ", p = ", p))
# Check the reverse test (equivalent to swapping null and test)
reverse_p <- sum(pow$test_dist < lr)/n_test
reverse_threshold_tail <- sort(-pow$test_dist)[ round(threshold*n_test) ]
reverse_power <- sum(-pow$null_dist > reverse_threshold_tail)/n_null
print(paste("reverse test power ", reverse_power,
", reverse test p = ", reverse_p))
aic_wrong <- sum(pow$null_dist > threshold_mark)/n_null
aic_power <- sum(pow$test_dist > threshold_mark)/n_test
## Calculate Axis Limits
if(is.null(xlim)) xlim <- c( min(pow$null_dist, pow$test_dist),
max(pow$null_dist, pow$test_dist) )
if(is.null(ylim)) ylim <- c(min(td$y, nd$y), max(td$y,nd$y))
## Density plots
if(type != "hist"){
if(shade_aic==TRUE | shade_power==TRUE){
plottype="s"
} else {
plottype ="n"
}
## Plot the null distribution with appropriate shading
if(null_dist){
plot(nd, xlim=xlim, ylim=ylim, main=main, type=plottype,
col=rgb(0,0,1,1), ...)
if(shade_p){
shade_p <- which(nd$x > pow$lr)
polygon(c(pow$lr,nd$x[shade_p]), c(0,nd$y[shade_p]),
col=rgb(0,0,1,.5), border=rgb(0,0,1,1))
} else if(shade){
polygon(nd$x, nd$y, col=rgb(0,0,1,.5), border=rgb(0,0,1,1))
}
} else {
plot(0,0, xlim=xlim, ylim = ylim, main=main, xlab=" Likelihood Ratio",
...) ## blank plot
}
## Plot the test distribution with appropriate shading
if(test_dist){
if(!null_dist) plot(td, xlim=xlim, main=main, type=plottype,
col=rgb(1,0,0,1), ...) ## just plot test dist
else lines(td, type=plottype, col=rgb(1,0,0,1))
threshold_tail <- sort(pow$null_dist)[ round(threshold*n_null) ]
if(shade_power){
shade_power <- which(td$x > threshold_tail)
polygon(c(threshold_tail, td$x[shade_power]), c(0,td$y[shade_power]),
col=rgb(1,0,0,.5), border=rgb(1,0,0,1))
} else if(shade){
polygon(td$x, td$y, col=rgb(1,0,0,.5), border=rgb(1,0,0,1))
}
}
## AIC shading
if(shade_aic){
shade_p <- which(nd$x > threshold_mark)
polygon(c(threshold_mark,nd$x[shade_p]), c(0,nd$y[shade_p]),
col=rgb(1,.5,0,.5), border=rgb(0,0,1,0))
if(test_dist){ # If test_dist on, shade the errors under the test too
shade_p <- which(td$x < threshold_mark)
polygon(c(threshold_mark,td$x[shade_p]), c(0,td$y[shade_p]),
col=rgb(1,1,0,.5), border=rgb(1,0,0,0))
}
}
## Plot histograms instead of density plots
} else {
hist(pow$null_dist, xlim=xlim, lwd=3, col=rgb(0,0,1,.5),
border="white", main=main, ...)
if(test_dist){
hist(pow$test_dist, add=T, lwd=0, col=rgb(1,0,0,.5),
border="white", main=main, ...)
}
}
## Add lines to plots
if(show_data){
#abline(v=pow$lr, lwd=3, col="darkred", lty=2 )
points(lr,yshift(1), cex=1.5, col="black", pch=25, fg="black",
bg="black")
}
if(show_aic){
# abline(v=threshold_mark, lwd=3, col="darkgray", lty=3)
points(threshold_mark,yshift(1), cex=1.5, col="red", pch=25, fg="red", bg="red")
}
## add legend
if(legend){
if (shade==TRUE){
legend("topright", c("sim under Null", "sim under Test", "observed"),
pch=c(15,15,25), fg=c("white", "white", "black"),
col=c(rgb(0,0,1,.5), rgb(1,0,0,.5), "black"))
}
else if (shade_aic==TRUE & test_dist==TRUE){
legend("topright",
c( paste("False Alarms (", round(aic_wrong*100,3),
"%)", sep=""),
paste("Missed Events (", round((1-aic_power)*100,3),
"%)", sep="")),
pch=c(15,15), col=c(rgb(1,.5,0,.5), rgb(1,1,0,.5)))
}
else if (shade_aic==TRUE & test_dist==FALSE){
legend("topright", paste("False Alarms (", round(aic_wrong*100,3),
"%)", sep=""), pch=15, col=rgb(1,.5,0,.5))
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.