# montecarlotest.R
## Dependencies: snowfall (parallel computing)
## requries loglik and getParameters to be defined functions. Here we just set up there generics.
# loglik isn't a standard S3 generic, so we add it:
loglik <- function(x, y, ...) UseMethod("loglik")
# getParameters isn't a standard S3 generic, so we add it:
getParameters <- function(x, y, ...) UseMethod("getParameters")
## Functions compare_models and collect work to break up the
## function montecarlotest() so that it can be deployed on arbitrary
## parallel fashion/architecture. Typical use looks like this:
# #Parallelize comparison A,B in any fashion:
# Asims <- sfLapply(1:nboot, function(i) compare_models(A, B))
# #and maybe some more sims using snow package
# more_Asims <- parLapply(cluster, 1:nboot, function(i) compare_models(A, B))
# Bsims <- parLapply(cluster, 1:nboot, function(i) compare_models(B, A))
# #Concatentation will work if using Lapply loops only!!
# pow <- collect(c(Asims,more_Asims), Bsims, A, B)
# plot(pow)
compare_models <- function(A, B, interval=NA, ...){
# Function simulates data under model A,
# re-estimates model A and B from this data.
# Args:
# A: a model with methods update, simulate, loglik, getParameters
# B: another model with these methods
# interval: optional arg to simulate(A, interval), which may change
# the simulation interval. NA if not used.
# ...: additional options to update function
# Returns the 2*difference in log likelihood, as
# well as the estimated parameters of model A
# and B from the simulated data.
if(is.na(interval))
data <- simulate(A)
else
data <- simulate(A, interval)
if (is(data,"list"))
data <- data$rep.1 # handle data that has replicates
A <- update(A, data, ...)
B <- update(B, data, ...)
lr <- -2*(loglik(A) - loglik(B))
list(lr, getParameters(A), getParameters(B))
}
collect <- function(A_sim, B_sim, A, B){
if(! is(A_sim, "list"))
stop(paste("A_sim is in format", class(A_sim), "instead of list"))
if(! is(B_sim, "list"))
stop(paste("B_sim is in format", class(B_sim), "instead of list"))
## grab the distribution of likelihood ratios
A_dist <- sapply(A_sim, function(x) x[[1]])
B_dist <- sapply(B_sim, function(x) -x[[1]])
nboot <- length(A_dist)
## grab dist of pars observed for null model under null model sims
A_pars <- sapply(A_sim, function(x) x[[2]])
B_pars <- sapply(B_sim, function(x) x[[2]])
Asim_Bpars <- sapply(A_sim, function(x) x[[3]])
Bsim_Apars <- sapply(B_sim, function(x) x[[3]])
## format the output
output <- list(null=A, test=B, nboot=nboot,
null_dist=A_dist, test_dist=B_dist,
null_par_dist = A_pars,
test_par_dist = B_pars,
null_sim_test_pars = Asim_Bpars,
test_sim_null_pars = Bsim_Apars)
class(output) <- "pow"
output
}
montecarlotest <- function(null, test, nboot = 100, cpu = 2,
GetParNames=TRUE, times=NA, ...){
# bootstrap the likelihood ratio
# Args:
# null: a model with methods "simulate", "update", "loglik", "getParameters"
# test: another such model
# nboot: number of replicates
# cpu: number of cpus available on the machine. Note: if sfInit has already
# been called and not stopped, those conditions will be used and this
# will be ignored.
# GetParNames: optionally attempt to report parameter names in the bootstrap
# ...: options passed to the optim routine
# times: simulations can be sampled at specified times. If NA, will draw
# sample points in simulating the process that match those of the
# original data. If just a scalar number is given, will use the same
# interval, but draw that number of sample points. If a numeric
# vector is given, it will sample at those time intervals.
#
# FIXME needs to be extended to sample at times longer/shorter
# than original domain. this is really a function of the simulate
# method for the model objects, the montecarlotest function just
# passes this to the simulation function.
#
# Returns:
# A list with the following objects:
# null, test: the models passed in as null and test,
# nboot: number of replicate simulations used,
# null_dist, test_dist: The distribution of Likelihood ratios observed
# when simulating under the respective model
# null_par_dist: distribution of parameters observed for null model,
# simulating under the null model,
# test_par_dist: likewise for test model
# null_sim_test_pars: parameters estimated for the test model over
# simulations for the null model
# test_sim_null_pars: parameters estimated fro the null model over
# simulations for the test model.
#
## are we in parallel already? If not, how many cpus do we have?
if(cpu>1 & !sfIsRunning()){
sfInit(parallel=TRUE, cpu=cpu)
libs <- names(sessionInfo()[[5]])
for(lib in libs)
sfLibrary(lib, character.only=TRUE)
sfExportAll()
} else if(cpu<2 & !sfIsRunning()){
sfInit()
}
## generate each distribution
null_sim <- sfSapply(1:nboot, function(i){
if(is.na(times))
data <- simulate(null)
else
data <- simulate(null, times=times)
if (is(data,"list")) data <- data$rep.1 # handle data that has replicates
null <- update(null, data, ...)
test <- update(test, data, ...)
lr <- -2*(loglik(null) - loglik(test))
list(lr, getParameters(null), getParameters(test))
})
test_sim <- sfSapply(1:nboot, function(i){
if(is.na(times))
data <- simulate(test)
else
data <- simulate(test, times=times)
if (is(data,"list")) data <- data$rep.1 # handle data that has replicates
null <- update(null, data, ...)
test <- update(test, data, ...)
lr <- -2*(loglik(null) - loglik(test))
list(lr, getParameters(null), getParameters(test))
})
## grab the distribution of likelihood ratios
null_dist <- unlist(null_sim[1,])
test_dist <- unlist(test_sim[1,])
## grab dist of pars observed for null model under null model sims
null_bootstrap_pars <- matrix( unlist(null_sim[2,]), ncol=nboot)
if(GetParNames) rownames(null_bootstrap_pars) <- names(getParameters(null))
## grab dist of pars observed for test model under test model sims
test_bootstrap_pars <- matrix( unlist(test_sim[3,]), ncol=nboot)
if(GetParNames) rownames(test_bootstrap_pars) <- names(getParameters(test))
### Grab the cross-case bootstraps too:
## grab dist of pars observed for test model under null model sims
null_sim_test_pars <- matrix( unlist(null_sim[3,]), ncol=nboot)
if(GetParNames) rownames(null_sim_test_pars) <- names(getParameters(test))
## grab dist of pars observed for null model under test model sims
test_sim_null_pars <- matrix( unlist(test_sim[2,]), ncol=nboot)
if(GetParNames) rownames(test_sim_null_pars) <- names(getParameters(null))
## format the output
output <- list(null=null, test=test, nboot=nboot,
null_dist=null_dist, test_dist=test_dist,
null_par_dist = null_bootstrap_pars,
test_par_dist = test_bootstrap_pars,
null_sim_test_pars = null_sim_test_pars,
test_sim_null_pars = test_sim_null_pars)
class(output) <- "pow"
output
}
## DOF calculation ## FIXME should be more generic
dof <- function(object){
if(is(object, "fitContinuous")) dof<- object[[1]]$k
else if(is(object, "hansentree")){
dof <- length(object@sqrt.alpha)+length(object@sigma)+
sum(sapply(object@theta,length))
} else if(is(object, "browntree")) {
dof <- length(object@sigma)+sum(sapply(object@theta,length))
} else {
dof <- object$k
if(is.null(object$k))
print(paste("cannot determine degrees of freedom,i
please input for model"))
}
dof
}
## should also add the functions for summary parameter distributions
## Some generic overlap comparisons.
## should be replaced with ROC curve tools, (seperate file)
KL_divergence <- function(null_dist, test_dist){
nd <- density(null_dist)
td <- density(test_dist)
P <- nd$y[nd$y > 0 & td$y > 0]
Q <- td$y[nd$y > 0 & td$y > 0]
KL_div <- P %*% log(P/Q)
KL_div
}
overlap <- function(pow, bw="nrd0"){
nd <- density(pow$null_dist, bw=bw)
td <- density(pow$test_dist, bw=bw)
td$y %*% nd$y
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.