### iidJack.R ---
#----------------------------------------------------------------------
## author: Brice Ozenne
## created: jun 23 2017 (09:15)
## Version:
## last-updated: Jan 11 2022 (16:08)
## By: Brice Ozenne
## Update #: 333
#----------------------------------------------------------------------
##
### Commentary:
##
### Change Log:
#----------------------------------------------------------------------
##
### Code:
## * documentation - iidJack
#' @title Jackknife iid Decomposition from Model Object
#' @description Extract iid decomposition (i.e. influence function) from model object.
#'
#' @name iidJack
#'
#' @param object a object containing the model.
#' @param data [data.frame] dataset used to perform the jackknife.
#' @param grouping [vector] variable defining cluster of observations that will be simultaneously removed by the jackknife.
#' @param cpus [integer >0] the number of processors to use.
#' If greater than 1, the fit of the model and the computation of the influence function for each jackknife sample is performed in parallel.
#' @param keep.warnings [logical] keep warning messages obtained when estimating the model with the jackknife samples.
#' @param keep.error [logical]keep error messages obtained when estimating the model with the jackknife samples.
#' @param cl [cluster] a parallel socket cluster generated by \code{parallel::makeCluster}
#' that has been registered using \code{registerDoParallel}.
#' @param trace [logical] should a progress bar be used to trace the execution of the function
#' @param ... [internal] only used by the generic method.
#'
#' @return A matrix with in row the samples and in columns the parameters.
#'
#' @examples
#' n <- 20
#' set.seed(10)
#' mSim <- lvm(c(Y1,Y2,Y3,Y4,Y5) ~ 1*eta)
#' latent(mSim) <- ~eta
#' categorical(mSim, K=2) <- ~G
#' transform(mSim, Id ~ eta) <- function(x){1:NROW(x)}
#' dW <- lava::sim(mSim, n, latent = FALSE)
#'
#' #### LVM ####
#' \dontrun{
#' m1 <- lvm(c(Y1,Y2,Y3,Y4,Y5) ~ 1*eta)
#' latent(m1) <- ~eta
#' regression(m1) <- eta ~ G
#' e <- estimate(m1, data = dW)
#'
#' iid1 <- iidJack(e)
#' iid2 <- iid(e)
#' attr(iid2, "bread") <- NULL
#'
#' apply(iid1,2,sd)
#' apply(iid2,2,sd)
#' quantile(iid2 - iid1)
#' }
#'
#'
#' @concept iid decomposition
#' @export
iidJack <- function(object,...) UseMethod("iidJack")
## * method iidJack.default
#' @rdname iidJack
#' @export
iidJack.default <- function(object, data = NULL, grouping = NULL, cpus = 1,
keep.warnings = TRUE, keep.error = TRUE,
cl = NULL, trace = TRUE, ...) {
estimate.lvm <- lava_estimate.lvm
## ** test args
init.cpus <- (cpus>1 && is.null(cl))
if(init.cpus){
test.package <- try(requireNamespace("foreach"), silent = TRUE)
if(inherits(test.package,"try-error")){
stop("There is no package \'foreach\' \n",
"This package is necessary when argument \'cpus\' is greater than 1 \n")
}
if(cpus > parallel::detectCores()){
stop("Argument \'cpus\' is greater than the number of available CPU cores \n",
"available CPU cores: ",parallel::detectCores(),"\n")
}
}
## ** extract data
if(is.null(data)){
myData <- extractData(object, design.matrix = FALSE, as.data.frame = TRUE,
envir = parent.env(environment()))
}else{
myData <- as.data.frame(data)
}
n.obs <- NROW(myData)
if(any(class(object) %in% "lme")){
getCoef <- nlme::fixef
}else{
getCoef <- coef
}
coef.x <- getCoef(object)
names.coef <- names(coef.x)
n.coef <- length(coef.x)
## ** update formula/model when defined by a variable and not in the current namespace
if(length(object$call[[2]])==1){
modelName <- as.character(object$call[[2]])
if(modelName %in% ls() == FALSE){
assign(modelName, value = evalInParentEnv(object$call[[2]]))
}
}
## ** define the grouping level for the data
if(is.null(grouping)){
myData$XXXgroupingXXX <- 1:n.obs
grouping <- "XXXgroupingXXX"
}else{
if(length(grouping)>1){
stop("grouping must refer to only one variable \n")
}
if(grouping %in% names(myData) == FALSE){
stop("variable defined in grouping not found in data \n")
}
}
myData[,grouping] <- as.character(myData[,grouping])
Ugrouping <- unique(myData[,grouping])
n.group <- length(Ugrouping)
## ** warper
warper <- function(i){ # i <- "31"
newData <- subset(myData, subset = myData[[grouping]]!=i)
xnew <- tryWithWarnings(stats::update(object, data = newData))
if(!is.null(xnew$error)){
xnew$value <- rep(NA, n.coef)
}else{
xnew$value <- getCoef(xnew$value)
}
return(xnew)
}
# warper("31")
## ** start parallel computation
if(init.cpus){
## define cluster
if(trace>0){
cl <- parallel::makeCluster(cpus, outfile = "")
}else{
cl <- parallel::makeCluster(cpus)
}
## link to foreach
doParallel::registerDoParallel(cl)
}
## *** link to foreach
doParallel::registerDoParallel(cl)
## ** parallel computations: get jackknife coef
if(cpus>1){
if(trace>0){
pb <- utils::txtProgressBar(max = n.group, style = 3)
}
## *** packages/objects to export
estimator <- as.character(object$call[[1]])
vec.packages <- c("lava")
possiblePackage <- gsub("package:","",utils::getAnywhere(estimator)$where[1])
existingPackage <- as.character(utils::installed.packages()[,"Package"])
ls.call <- as.list(object$call)
test.length <- which(unlist(lapply(ls.call, length))==1)
test.class <- which(unlist(lapply(ls.call, function(cc){
(class(c) %in% c("numeric","character","logical")) == FALSE
})))
test.class <- which(unlist(lapply(ls.call, class)) %in% c("numeric","character","logical") == FALSE)
indexExport <- intersect(test.class,test.length)
toExport <- sapply(ls.call[indexExport], as.character)
if(possiblePackage %in% existingPackage){
vec.packages <- c(vec.packages,possiblePackage)
}
parallel::clusterCall(cl, fun = function(x){
sapply(vec.packages, function(iP){
suppressPackageStartupMessages(requireNamespace(iP, quietly = TRUE))
})
})
## *** objects to export
if(length(object$call$data)==1){
toExport <- c(toExport,as.character(object$call$data))
}
if(length(object$call$formula)==1){
toExport <- c(toExport,as.character(object$call$formula))
}
if(length(object$call$fixed)==1){
toExport <- c(toExport,as.character(object$call$fixed))
}
toExport <- c(unique(toExport),"tryWithWarnings")
## *** parallel computation
i <- NULL # [:for CRAN check] foreach
resLoop <- foreach::`%dopar%`(
foreach::foreach(i = 1:n.group,
.export = toExport),{
if(trace>0){utils::setTxtProgressBar(pb, i)}
return(warper(Ugrouping[i]))
})
if(trace>0){close(pb)}
}else{
if(trace>0){
test.package <- try(requireNamespace("pbapply"), silent = TRUE)
if(inherits(test.package,"try-error")){
stop("There is no package \'pbapply\' \n",
"This package is necessary when argument \'trace\' is TRUE \n")
}
resLoop <- pbapply::pblapply(Ugrouping, warper)
}else{
resLoop <- lapply(Ugrouping, warper)
}
}
coefJack <- do.call(rbind, lapply(resLoop,"[[","value"))
rownames(coefJack) <- 1:n.group
## ** end parallel computation
if(init.cpus){
parallel::stopCluster(cl)
}
## ** post treatment: from jackknife coef to the iid decomposition
# defined as (n-1)*(coef-coef(-i))
# division by n to match output of lava, i.e. IF/n
iidJack <- -(n.group-1)/n.group * sweep(coefJack, MARGIN = 2, STATS = coef.x, FUN = "-")
colnames(iidJack) <- names.coef
if(keep.warnings){
ls.warnings <- lapply(resLoop,"[[","warnings")
names(ls.warnings) <- 1:n.group
ls.warnings <- Filter(Negate(is.null), ls.warnings)
attr(iidJack,"warnings") <- ls.warnings
}
if(keep.error){
ls.error <- lapply(resLoop,"[[","error")
names(ls.error) <- 1:n.group
ls.error <- Filter(Negate(is.null), ls.error)
attr(iidJack,"error") <- ls.error
}
return(iidJack)
}
#----------------------------------------------------------------------
### iidJack.R ends here
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.