Nothing
#' Running an extreme value and zero inflated negative binomial model
#'
#' @param formula_nb Formula for the negative binomial (count) component of the model
#' @param formula_evi Formula for the extreme-value inflation component of the model. If NULL taken as the same formula as nb
#' @param formula_pareto Formula for the pareto (extreme value) component of the model If NULL taken as the same formula as nb
#' @param data Data to run the model on
#' @param max.diff.par Tolerance for EM algorithm. Will be considered to have converged if the maximum absolute difference in the parameter estimates are lower than this value
#' @param max.no.em.steps Maximum number of EM steps to run. Will be considered to not have converged if this number is reached and convergence is not reached
#' @param max.no.em.steps.warmup Number of EM steps in the warmup rounds
#' @param c.lim Integer range defining the possible values of C
#' @param max.upd.par.pl.multinomial Maximum parameter change step size in the extreme value inflation component
#' @param max.upd.par.nb Maximum parameter change step size in the count component
#' @param max.upd.par.pl Maximum parameter change step size in the pareto component
#' @param no.m.bfgs.steps.multinomial Number of BFGS steps for the multinomial model
#' @param no.m.bfgs.steps.nb Number of BFGS steps for the negative binomial model
#' @param no.m.bfgs.steps.pl Number of BFGS steps for the pareto model
#' @param pdf.pl.type Probability density function type for the pareto component. Either 'approx' or 'exact'. 'approx' is adviced in most cases
#' @param eta.int Initial values for eta
#' @param init.Beta.multinom.PL Initial values for beta parameters in the extreme value inflation component. Vector of same length as number of parameters in the extreme value inflation component or NULL (which gives starting values of 0)
#' @param init.Beta.NB Initial values for beta parameters in the count component. Vector of same length as number of parameters in the count component or NULL (which gives starting values of 0)
#' @param init.Beta.PL Initial values for beta parameters in the pareto component. Vector of same length as number of parameters in the pareto component or NULL (which gives starting values of 0)
#' @param init.Alpha.NB Initial value of Alpha NB, integer or NULL (giving a starting value of 0)
#' @param init.C Initial value of C. Integer which should be within the C_lim range.
#'
#' @return An object of class 'evinf' containing XX
#' @noRd
run_evinb <- function(formula_nb,
formula_evi = NULL,
formula_pareto = NULL,
data,
max.diff.par = 1e-3,
max.no.em.steps = 200,
max.no.em.steps.warmup = 5,
c.lim=c(70,300),
max.upd.par.pl.multinomial=0.5,
max.upd.par.nb = 0.5,
max.upd.par.pl = 0.5,
no.m.bfgs.steps.multinomial=3,
no.m.bfgs.steps.nb = 3,
no.m.bfgs.steps.pl = 3,
pdf.pl.type=c("approx",'exact'),
eta.int = c(-1,1),
init.Beta.multinom.PL = NULL,
init.Beta.NB = NULL,
init.Beta.PL = NULL,
init.Alpha.NB = 0.01,
init.C = 200,
verbose = FALSE){
if(is.null(formula_evi)){
formula_evi <- formula_nb
}
if(is.null(formula_pareto)){
formula_pareto <- formula_nb
}
pdf.pl.type <- match.arg(pdf.pl.type,c("approx",'exact'))
mf_nb <- model.frame(formula_nb,data)
mf_zi <- model.frame(formula_nb,data)
mf_evi <- model.frame(formula_evi,data)
mf_pareto <- model.frame(formula_pareto,data)
OBS.Y <- as.matrix(model.response(mf_nb))
OBS.X.obj <- list()
OBS.X.obj$X.multinom.ZC <- as.matrix(mf_zi[,-1])
OBS.X.obj$X.multinom.PL <- as.matrix(mf_evi[,-1])
OBS.X.obj$X.NB <- as.matrix(mf_nb[,-1])
OBS.X.obj$X.PL <- as.matrix(mf_pareto[,-1])
Control <- list(max.diff.par = max.diff.par,
max.no.em.steps = max.no.em.steps,
max.no.em.steps.warmup = max.no.em.steps.warmup,
c.lim=c.lim,
max.upd.par.zc.multinomial=max.upd.par.pl.multinomial,
max.upd.par.pl.multinomial=max.upd.par.pl.multinomial,
max.upd.par.nb = max.upd.par.nb,
max.upd.par.pl = max.upd.par.pl,
no.m.bfgs.steps.multinomial=no.m.bfgs.steps.multinomial,
no.m.bfgs.steps.nb = no.m.bfgs.steps.nb,
no.m.bfgs.steps.pl = no.m.bfgs.steps.pl,
pdf.pl.type=pdf.pl.type,
eta.int = eta.int)
Ini.Val <- list()
Ini.Val$Beta.multinom.ZC <- rep(0,ncol(mf_zi))
Ini.Val$Beta.multinom.ZC[1] <- -100
if(is.null(init.Beta.multinom.PL) | length(init.Beta.multinom.PL) != ncol(mf_evi)){
Ini.Val$Beta.multinom.PL <- rep(0,ncol(mf_evi))
}else{
Ini.Val$Beta.multinom.PL <- init.Beta.multinom.PL
}
if(is.null(init.Beta.NB) | length(init.Beta.NB) != ncol(mf_evi)){
Ini.Val$Beta.NB <- rep(0,ncol(mf_nb))
}else{
Ini.Val$Beta.NB <- init.Beta.NB
}
if(is.null(init.Beta.PL) | length(init.Beta.PL) != ncol(mf_pareto)){
Ini.Val$Beta.PL <- rep(0,ncol(mf_pareto))
}else{
Ini.Val$Beta.PL <- init.Beta.PL
}
Ini.Val$Alpha.NB <- init.Alpha.NB
Ini.Val$C <- init.C
if(verbose){
object <- plinfl.nb.regression.fun(OBS.Y,OBS.X.obj,Ini.Val,Control)
}else{
capture.output(object <- plinfl.nb.regression.fun(OBS.Y,OBS.X.obj,Ini.Val,Control))
}
object$par.mat$Beta.multinom.ZC <- as.numeric(object$par.mat$Beta.multinom.ZC)
object$par.mat$Beta.multinom.PL <- as.numeric(object$par.mat$Beta.multinom.PL)
object$par.mat$Beta.NB <- as.numeric(object$par.mat$Beta.NB)
object$par.mat$Beta.PL <- as.numeric(object$par.mat$Beta.PL)
names(object$par.mat$Beta.NB) <- c('(Intercept)',all.vars(formula_nb)[-1])
#names(object$par.mat$Beta.multinom.ZC) <- c('(Intercept)',all.vars(formula_zi)[-1])
names(object$par.mat$Beta.multinom.PL) <- c('(Intercept)',all.vars(formula_evi)[-1])
names(object$par.mat$Beta.PL) <- c('(Intercept)',all.vars(formula_pareto)[-1])
object$formulas <- list(formula_nb = formula_nb,
#formula_zi = formula_zi,
formula_evi = formula_evi,
formula_pareto = formula_pareto)
object$data <- list()
object$data$data <- data %>% dplyr::select(dplyr::all_of(unique(c(all.vars(formula_nb),
#all.vars(formula_zi),
all.vars(formula_evi),
all.vars(formula_pareto))))) %>%
na.omit()
object$data$y <- as.numeric(object$y)
object$y <- NULL
object$data$x.nb <- object$x.nb
object$x.nb <- NULL
object$data$x.pl <- object$x.pl
object$x.pl <- NULL
object$data$x.multinom.pl <- object$x.multinom.pl
object$x.multinom.pl <- NULL
object$resp <- object$resp[,2:3]
object$props <- object$par.mat$Props[,2:3]
colnames(object$props) <- colnames(object$resp) <- c('count','pareto')
object$par.mat$Props <- NULL
object$coef <- object$par.mat
object$par.mat <- NULL
object$coef$Beta.multinom.ZC <- NULL
object$par_all <- object$par_all[length(Ini.Val$Beta.multinom.ZC)+1:length(object$par_all)]
object$fitted <- list()
object$fitted$y.hat.pl_exp.E.logy <- object$y.hat.plexpElogy
object$y.hat.plexpElogy <- NULL
object$fitted$y.hat.pl_E.inv.y <- object$y.hat.pl.E.inv.y
object$y.hat.pl.E.inv.y <- NULL
object$fitted$y.hat.pl_median <- object$y.hat.plmedian
object$y.hat.plmedian <- NULL
object$fitted$y.hat.pl_mean <- object$y.hat.plmean
object$y.hat.plmean <- NULL
object$fitted$mu.nb <- object$mu.nb.vec
object$mu.nb.vec <- NULL
object$fitted$alpha.pl <- object$alpha.pl.vec
object$alpha.pl.vec <- NULL
object$fitted$pl_exp.E.log.y <- object$exp.E.log.y
object$exp.E.log.y <- NULL
object$fitted$pl_median <- object$median.pl.vec
object$median.pl.vec <- NULL
object$fitted$pl_mean <- object$mean.pl.vec
object$mean.pl.vec <- NULL
object$fitted$prob_count <- object$props[,1]
object$fitted$prob_pareto <- object$props[,2]
object$fitted$posterior_count <- object$resp[,1]
object$fitted$posterior_pareto <- object$resp[,2]
class(object) <- 'evinb'
return(object)
}
#' A single bootstrap run on an ezinb object
#'
#' @param object The evinb object to run the bootstrap on
#' @param block Optional string specifying varible for block bootstrapping
#' @param timing Should time be kept
#' @param track_progress Should progress be tracked (experimental)
#' @param id Id when tracking progress
#' @param maxboot Number of bootstraps when tracking progress
#'
#' @return A bootstrapped evinb object
#'
#' @noRd
bootrun_evinb <- function(object,block = NULL, timing=TRUE, track_progress = FALSE, id=NULL, maxboot=NULL){
tim <- Sys.time()
if(is.null(block)){
boot_id <- sample(1:nrow(object$data$x.nb),nrow(object$data$x.nb),replace = T)
}else{
uniques <- unique(block)
boot_block_id <- sample(uniques,length(uniques),replace=T)
boot_id <- boot_block_id %>% purrr::map(~which(block == .x)) %>% purrr::reduce(c)
}
OBS.Y <- object$data$y[boot_id]
OBS.X.obj <- list()
OBS.X.obj$X.multinom.ZC <- object$data$x.nb[boot_id,] # note: only for functionality, does not update
OBS.X.obj$X.multinom.PL <- object$data$x.multinom.pl[boot_id,]
OBS.X.obj$X.NB <- object$data$x.nb[boot_id,]
OBS.X.obj$X.PL <- object$data$x.pl[boot_id,]
Control <- object$control
Ini.Val <- list()
Ini.Val$Beta.multinom.ZC <- c(-100,rep(0,ncol(OBS.X.obj$X.multinom.ZC)))
Ini.Val$Beta.multinom.PL <- as.numeric(object$coef$Beta.multinom.PL)
Ini.Val$Beta.NB <- as.numeric(object$coef$Beta.NB)
Ini.Val$Beta.PL <- as.numeric(object$coef$Beta.PL)
Ini.Val$Alpha.NB <- object$coef$Alpha.NB
Ini.Val$C <- object$coef$C
capture.output(evinb_boot <- plinfl.nb.regression.fun(OBS.Y,OBS.X.obj,Ini.Val,Control))
evinb_boot$par.mat$Beta.multinom.PL <- as.numeric(evinb_boot$par.mat$Beta.multinom.PL)
evinb_boot$par.mat$Beta.NB <- as.numeric(evinb_boot$par.mat$Beta.NB)
evinb_boot$par.mat$Beta.PL <- as.numeric(evinb_boot$par.mat$Beta.PL)
names(evinb_boot$par.mat$Beta.NB) <- c('(Intercept)',all.vars(object$formulas$formula_nb)[-1])
#names(evinb_boot$par.mat$Beta.multinom.ZC) <- c('(Intercept)',all.vars(object$formulas$formula_zi)[-1])
names(evinb_boot$par.mat$Beta.multinom.PL) <- c('(Intercept)',all.vars(object$formulas$formula_evi)[-1])
names(evinb_boot$par.mat$Beta.PL) <- c('(Intercept)',all.vars(object$formulas$formula_pareto)[-1])
evinb_boot$resp <- NULL
evinb_boot$props <- evinb_boot$par.mat$Props[,2:3]
evinb_boot$par.mat$Props <- NULL
evinb_boot$coef <- evinb_boot$par.mat
evinb_boot$par.mat <- NULL
evinb_boot$coef$Beta.multinom.ZC <- NULL
evinb_boot$formulas <- object$formulas
evinb_boot$y.hat.plexpElogy <- NULL
evinb_boot$y.hat.pl.E.inv.y <- NULL
evinb_boot$y.hat.plmedian <- NULL
evinb_boot$y.hat.plmean <- NULL
evinb_boot$mu.nb.vec <- NULL
evinb_boot$alpha.pl.vec <- NULL
evinb_boot$exp.E.log.y <- NULL
evinb_boot$median.pl.vec <- NULL
evinb_boot$mean.pl.vec <- NULL
evinb_boot$data <- NULL
evinb_boot$boot_id <- boot_id
class(evinb_boot) <- 'evinb_boot'
if(timing){
evinb_boot$time <- difftime(Sys.time(),tim,units='secs')
}
if(track_progress){
cat("\n ======= Bootstrap ",id, " of ", maxboot, "done in ", round(evinb_boot$time,1), "seconds ===== \n")
}
return(evinb_boot)
}
#' Running an extreme value inflated negative binomial model with bootstrapping
#'
#' @param formula_nb Formula for the negative binomial (count) component of the model
#' @param formula_evi Formula for the extreme-value inflation component of the model. If NULL taken as the same formula as nb
#' @param formula_pareto Formula for the pareto (extreme value) component of the model. If NULL taken as the same formula as nb
#' @param data Data to run the model on
#' @param bootstrap Should bootstrapping be performed. Needed to obtain standard errors and p-values
#' @param n_bootstraps Number of bootstraps to run. For use of bootstrapped p-values, at least 1,000 bootstraps are recommended. For approximate p-values, a lower number can be sufficient
#' @param multicore Should multiple cores be used?
#' @param ncores Number of cores if multicore is used. Default (NULL) is one less than the available number of cores
#' @param block Optional string indicating a case-identifier variable when using block bootstrapping
#' @param boot_seed Optional bootstrap seed to ensure reproducible results.
#' @param max.diff.par Tolerance for EM algorithm. Will be considered to have converged if the maximum absolute difference in the parameter estimates are lower than this value
#' @param max.no.em.steps Maximum number of EM steps to run. Will be considered to not have converged if this number is reached and convergence is not reached
#' @param max.no.em.steps.warmup Number of EM steps in the warmup rounds
#' @param c.lim Integer range defining the possible values of C
#' @param max.upd.par.pl.multinomial Maximum parameter change step size in the extreme value inflation component
#' @param max.upd.par.nb Maximum parameter change step size in the count component
#' @param max.upd.par.pl Maximum parameter change step size in the pareto component
#' @param no.m.bfgs.steps.multinomial Number of BFGS steps for the multinomial model
#' @param no.m.bfgs.steps.nb Number of BFGS steps for the negative binomial model
#' @param no.m.bfgs.steps.pl Number of BFGS steps for the pareto model
#' @param pdf.pl.type Probability density function type for the pareto component. Either 'approx' or 'exact'. 'approx' is adviced in most cases
#' @param eta.int Initial values for eta
#' @param init.Beta.multinom.PL Initial values for beta parameters in the extreme value inflation component. Vector of same length as number of parameters in the extreme value inflation component or NULL (which gives starting values of 0)
#' @param init.Beta.NB Initial values for beta parameters in the count component. Vector of same length as number of parameters in the count component or NULL (which gives starting values of 0)
#' @param init.Beta.PL Initial values for beta parameters in the pareto component. Vector of same length as number of parameters in the pareto component or NULL (which gives starting values of 0)
#' @param init.Alpha.NB Initial value of Alpha NB, integer or NULL (giving a starting value of 0)
#' @param init.C Initial value of C. Integer which should be within the C_lim range.
#' @param verbose Should progress be printed for the first run of evinb
#'
#' @importFrom foreach %do%
#' @importFrom foreach %dopar%
#'
#' @return An object of class 'evinb'
#' @export
#'
#' @examples
#' data(genevzinb2)
#' model <- evinb(y~x1+x2+x3,data=genevzinb2, n_bootstraps = 10)
#'
evinb <- function(formula_nb,
formula_evi = NULL,
formula_pareto = NULL,
data,
bootstrap = TRUE,
n_bootstraps = 100,
multicore = FALSE,
ncores = NULL,
block = NULL,
boot_seed = NULL,
max.diff.par = 1e-2,
max.no.em.steps = 500,
max.no.em.steps.warmup = 5,
c.lim=c(50,1000),
max.upd.par.pl.multinomial=0.5,
max.upd.par.nb = 0.5,
max.upd.par.pl = 0.5,
no.m.bfgs.steps.multinomial=3,
no.m.bfgs.steps.nb = 3,
no.m.bfgs.steps.pl = 3,
pdf.pl.type="approx",
eta.int = c(-1,1),
init.Beta.multinom.PL = NULL,
init.Beta.NB = NULL,
init.Beta.PL = NULL,
init.Alpha.NB = 0.01,
init.C = 200,
verbose = FALSE){
i <- 'temp_iter'
if(is.null(formula_evi)){
formula_evi <- formula_nb
}
if(is.null(formula_pareto)){
formula_pareto <- formula_nb
}
t1 <- Sys.time()
full_run <- run_evinb(formula_nb = formula_nb,
formula_evi = formula_evi,
formula_pareto = formula_pareto,
data = data,
max.diff.par = max.diff.par,
max.no.em.steps = max.no.em.steps,
max.no.em.steps.warmup = max.no.em.steps.warmup,
c.lim = c.lim,
max.upd.par.pl.multinomial = max.upd.par.pl.multinomial,
max.upd.par.nb = max.upd.par.nb,
max.upd.par.pl = max.upd.par.pl,
no.m.bfgs.steps.multinomial,
no.m.bfgs.steps.nb,
no.m.bfgs.steps.pl,
pdf.pl.type,
eta.int,
init.Beta.multinom.PL,
init.Beta.NB,
init.Beta.PL,
init.Alpha.NB,
init.C,
verbose = verbose)
runtime <- difftime(Sys.time(),t1)
full_run$block <- block
if(!is.null(block)){
if(is.character(block)){
block2 <- data %>% dplyr::select(dplyr::all_of(unique(c(all.vars(formula_nb),
#all.vars(formula_zi),
all.vars(formula_evi),
all.vars(formula_pareto),block)))) %>% na.omit() %>%
dplyr::select(dplyr::all_of(block)) %>% dplyr::pull()
full_run$data$data <- dplyr::bind_cols(data %>% dplyr::select(dplyr::all_of(unique(c(all.vars(formula_nb),
#all.vars(formula_zi),
all.vars(formula_evi),
all.vars(formula_pareto),block)))) %>% na.omit() %>%
dplyr::select(dplyr::all_of(block)),full_run$data$data)
}
}else{
block2 <- NULL
}
if(bootstrap){
if(multicore){
if(is.null(ncores)){
ncores <- parallel::detectCores()-1
}
if(.Platform$OS.type == 'windows'){
cl <- parallel::makeCluster(ncores)
doParallel::registerDoParallel(cl)
}else{
doParallel::registerDoParallel(cores = ncores)
}
ex_time <- runtime*n_bootstraps/ncores
}else{
ex_time <- runtime*n_bootstraps
}
cat("\n ======", "Approximate runtime for bootstraps is", ex_time, attributes(runtime)$units, ". Note: This is a very rough estimate of the runtime.")
if(verbose){
boots <- foreach::foreach(i=1:n_bootstraps,.options.RNG = boot_seed,.packages = 'evinf') %dorng%
try(bootrun_evinb(full_run,block2,track_progress = T, id = i, maxboot = n_bootstraps))
}else{
boots <- foreach::foreach(i=1:n_bootstraps,.options.RNG = boot_seed,.packages = 'evinf') %dorng%
try(bootrun_evinb(full_run,block2,track_progress = F, id = i, maxboot = n_bootstraps))
}
out <- c(full_run,
list(bootstraps = boots))
if(multicore){
if(.Platform$OS.type == 'windows'){
parallel::stopCluster(cl)
foreach::registerDoSEQ()
}else{
doParallel::stopImplicitCluster()
}
}
names(boots) <- paste('bootstrap_',1:length(boots),sep="")
}else{
out <- full_run
}
class(out) <- 'evinb'
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.