R/lav_bootstrap_lrt.R

## YR this files needs updating! should be merged with
## lav_bootstrap.R

bootstrapLRT <- function (h0 = NULL, h1 = NULL, R = 1000L,
                          type = "bollen.stine", verbose = FALSE,
                          return.LRT = FALSE,
                          double.bootstrap = "no",
                          double.bootstrap.R = 500L,
                          double.bootstrap.alpha = 0.05,
                          parallel = c("no", "multicore", "snow"),
                          ncpus = 1L,
                          cl = NULL,
                          iseed = NULL)
{
    # checks
    type <- tolower(type)
    stopifnot(inherits(h0, "lavaan"),
              inherits(h1, "lavaan"),
              type %in% c("bollen.stine", "parametric", "yuan", "nonparametric",
                          "ordinary"),
              double.bootstrap %in% c("no", "FDB", "standard"))
    if(type == "nonparametric") type <- "ordinary"

    # check for conditional.x = TRUE
    if(h0@Model@conditional.x) {
        stop("lavaan ERROR: this function is not (yet) available if conditional.x = TRUE")
    }

    # prepare
    LRT <- rep(as.numeric(NA), R)
    if((h1@optim$fx - h0@optim$fx) > sqrt(.Machine$double.eps)) {
        # restricted fit should not be better!
        cat(" ... h0@optim$fx = ", h0@optim$fx, "h1@optim$fx = ", h1@optim$fx,
            "h0 should not be better!\n")
        return(NULL)
    }
    LRT.original <- abs(anova(h1, h0)$`Chisq diff`[2L])
    # abs only needed because df may be the same for both models!


    if(double.bootstrap == "FDB") {
        LRT.2 <- numeric(R)
    } else if(double.bootstrap == "standard") {
        plugin.pvalues <- numeric(R)
    }


    # prepare for parallel processing
    if(missing(parallel)) parallel <- "no"
    parallel <- match.arg(parallel)
    have_mc <- have_snow <- FALSE
    if(parallel != "no" && ncpus > 1L) {
        if(parallel == "multicore")
            have_mc <- .Platform$OS.type != "windows"
        else if(parallel == "snow")
            have_snow <- TRUE
        if(!have_mc && !have_snow)
            ncpus <- 1L
        loadNamespace("parallel")
    }

    #data
    data <- h0@Data

    #Compute covariance matrix and additional mean vector
    if(type == "bollen.stine" || type == "parametric" || type == "yuan") {
        Sigma.hat <- computeSigmaHat(lavmodel = h0@Model)
        Mu.hat    <- computeMuHat(lavmodel = h0@Model)
    }

    # can we use the original data, or do we need to transform it first?
    if(type == "bollen.stine" || type == "yuan") {
        # check if data is complete
        if(h0@Options$missing != "listwise")
            stop("lavaan ERROR: bollen.stine/yuan bootstrap not available for missing data")
        dataX <- vector("list", length=data@ngroups)
    } else {
        dataX <- data@X
    }
    lavdata <- h0@Data
    lavoptions <- h0@Options

    #Bollen-Stine data transformation
    if(type == "bollen.stine") {
        for(g in 1:h0@Data@ngroups) {
            sigma.sqrt <- lav_matrix_symmetric_sqrt(     Sigma.hat[[g]])
            S.inv.sqrt <- lav_matrix_symmetric_sqrt(h0@SampleStats@icov[[g]])

            # center
            X <- scale(data@X[[g]], center = TRUE, scale = FALSE)

            # transform
            X <- X %*% S.inv.sqrt %*% sigma.sqrt

            # add model based mean
            if (h0@Model@meanstructure)
                X <- scale(X, center = (-1 * Mu.hat[[g]]), scale = FALSE)

            # transformed data
            dataX[[g]] <- X
        }

    # Yuan et al data transformation
    } else if((type == "yuan")) {
        # page numbers refer to Yuan et al, 2007
        # Define a function to find appropriate value of a
        # (p. 272)
        g.a <- function(a, Sigmahat, Sigmahat.inv, S, tau.hat, p){
            S.a <- a*S + (1-a)*Sigmahat
            tmp.term <- S.a %*% Sigmahat.inv
            res1 <- (sum(diag(tmp.term)) - log(det(tmp.term)) - p) - tau.hat
            res <- res1*res1
            # From p 272
            attr(res, "gradient") <- sum(diag((S - Sigmahat) %*%
                                     (Sigmahat.inv - chol2inv(chol(S.a)))))
            res
        }

        # Now use g.a within each group
        for(g in 1:h0@Data@ngroups) {
            S <- h0@SampleStats@cov[[g]]
            # test is in Fit slot
            ghat <- h0@test[[1]]$stat.group[[g]]
            df <- h0@test[[1]]$df
            Sigmahat <- Sigma.hat[[g]]
            Sigmahat.inv <- inv.chol(Sigmahat)
            nmv <- nrow(Sigmahat)
            n <- data@nobs[[g]]

            # Calculate tauhat_1, middle p. 267.
            # Yuan et al note that tauhat_1 could be negative;
            # if so, we need to let S.a = Sigmahat. (see middle p 275)
            tau.hat <- (ghat - df)/(n-1)

            if (tau.hat >= 0){
              # Find a to minimize g.a
              a <- optimize(g.a, c(0,1), Sigmahat, Sigmahat.inv,
                            S, tau.hat, nmv)$minimum

              # Calculate S_a (p. 267)
              S.a <- a*S + (1-a)*Sigmahat
            } else {
              S.a <- Sigmahat
            }

            # Transform the data (p. 263)
            S.a.sqrt <- lav_matrix_symmetric_sqrt(S.a)
            S.inv.sqrt <- lav_matrix_symmetric_sqrt(h0@SampleStats@icov[[g]])

            X <- data@X[[g]]
            X <- X %*% S.inv.sqrt %*% S.a.sqrt

            # transformed data
            dataX[[g]] <- X
        }
    }

    # run bootstraps
    fn <- function(b) {
        if (type == "bollen.stine" || type == "ordinary" || type == "yuan") {
            # take a bootstrap sample for each group
            BOOT.idx <- vector("list", length = lavdata@ngroups)
            for(g in 1:lavdata@ngroups) {
                stopifnot(lavdata@nobs[[g]] > 1L)
                boot.idx <- sample(x=lavdata@nobs[[g]],
                                   size=lavdata@nobs[[g]], replace=TRUE)
                BOOT.idx[[g]] <- boot.idx
                dataX[[g]] <- dataX[[g]][boot.idx,,drop=FALSE]
            }
            newData <- lav_data_update(lavdata = lavdata, newX = dataX,
                                       BOOT.idx = BOOT.idx,
                                       lavoptions = lavoptions)

        } else { # parametric!
            for(g in 1:lavdata@ngroups) {
                dataX[[g]] <- MASS::mvrnorm(n     = lavdata@nobs[[g]],
                                            Sigma = Sigma.hat[[g]],
                                            mu    = Mu.hat[[g]])
            }
            newData <- lav_data_update(lavdata = lavdata, newX = dataX,
                                       lavoptions = lavoptions)
        }

        # verbose
        if (verbose) cat("  ... bootstrap draw number: ", b, "\n")

        #Get sample statistics
        bootSampleStats <- try(lav_samplestats_from_data(
                               lavdata       = newData,
                               lavoptions    = lavoptions), silent = TRUE)
        if (inherits(bootSampleStats, "try-error")) {
            if (verbose) cat("     FAILED: creating h0@SampleStats statistics\n")
            return(NULL)
        }

        if (verbose) cat("  ... ... model h0: ")
        h0@Options$verbose <- FALSE
        h0@Options$se <- "none"
        h0@Options$test <- "standard"
        h0@Options$baseline <- FALSE
        h0@Options$h1 <- FALSE

        #Fit h0 model
        fit.h0 <- suppressWarnings(lavaan(slotOptions     = h0@Options,
                                          slotParTable    = h0@ParTable,
                                          slotSampleStats = bootSampleStats,
                                          slotData        = data))
        if (!fit.h0@optim$converged) {
            if (verbose) cat("     FAILED: no convergence\n")
            return(NULL)
        }
        if (verbose)
            cat("     ok -- niter = ", fit.h0@optim$iterations,
                " fx = ", fit.h0@optim$fx, "\n")

        if (verbose) cat("  ... ... model h1: ")
        h1@Options$verbose <- FALSE
        h1@Options$se <- "none"
        h1@Options$test <- "standard"
        h1@Options$baseline <- FALSE
        h1@Options$h1 <- FALSE

        #Fit h1 model
        fit.h1 <- suppressWarnings(lavaan(slotOptions     = h1@Options,
                                          slotParTable    = h1@ParTable,
                                          slotSampleStats = bootSampleStats,
                                          slotData        = data))

        if (!fit.h1@optim$converged) {
            if (verbose)
                cat("     FAILED: no convergence -- niter = ", fit.h1@optim$iterations,
                    " fx = ", fit.h1@optim$fx,"\n")
            return(NULL)
        }
        if (verbose)
            cat("     ok -- niter = ", fit.h1@optim$iterations,
                " fx = ", fit.h1@optim$fx, "\n")

        # store LRT
        if((fit.h1@optim$fx - fit.h0@optim$fx) > sqrt(.Machine$double.eps)) {
            #if((fit.h1@optim$fx - fit.h0@optim$fx) > 0.0) {
            if (verbose)
                cat("  ... ... LRT  = <NA> h0 > h1, delta = ", fit.h1@optim$fx - fit.h0@optim$fx, "\n")
            return(NULL)
        } else {
            lrt.boot <- abs(anova(fit.h1, fit.h0)$`Chisq diff`[2L])
            if (verbose)
                cat("  ... ... LRT  = ", lrt.boot, "\n")
        }

        #double bootstrap
        if (double.bootstrap == "standard") {
            if (verbose) cat("  ... ... calibrating p.value - ")

            plugin.pvalue <- bootstrapLRT(h0 = fit.h0, h1 = fit.h1,
                                          R = double.bootstrap.R,
                                          type = type,
                                          verbose = FALSE,
                                          return.LRT = FALSE, #FALSE
                                          parallel = parallel,
                                          ncpus = ncpus, cl = cl,
                                          double.bootstrap = "no")
            if (verbose) cat(sprintf("%5.3f", plugin.pvalue), "\n")
            attr(lrt.boot, "plugin.pvalue") <- plugin.pvalue
        } else if (double.bootstrap == "FDB") {
            #Fast double bootstrap
            plugin.pvalue <- bootstrapLRT(h0 = fit.h0, h1 = fit.h1,
                                          R = 1L,
                                          type = type,
                                          verbose = FALSE,
                                          return.LRT = TRUE, #TRUE
                                          parallel = parallel,
                                          ncpus = ncpus, cl = cl,
                                          double.bootstrap = "no")

            LRT.2 <- attr(plugin.pvalue, "LRT")

            if (verbose) cat("  ... ... LRT2 = ", LRT.2, "\n")
            attr(lrt.boot, "LRT.2") <- LRT.2
        }
        lrt.boot
    }

    #Parallel processing
    RR <- sum(R)
    res <- if (ncpus > 1L && (have_mc || have_snow)) {
        if (have_mc) {
            parallel::mclapply(seq_len(RR), fn, mc.cores = ncpus)
        }
        else if (have_snow) {
            if (is.null(cl)) {
                cl <- parallel::makePSOCKcluster(rep("localhost", ncpus))
                if (RNGkind()[1L] == "L'Ecuyer-CMRG")
                    parallel::clusterSetRNGStream(cl, iseed = iseed)
                res <- parallel::parLapply(cl, seq_len(RR), fn)
                parallel::stopCluster(cl)
                res
            }
            else parallel::parLapply(cl, seq_len(RR), fn)
        }
    } else lapply(seq_len(RR), fn)

    error.idx <- integer(0)
    for (b in seq_len(RR)) {
        if (!is.null(res[[b]])) {
            LRT[b] <- res[[b]]
            if (double.bootstrap == "standard") {
                plugin.pvalues[b] <- attr(res[[b]], "plugin.pvalue")
            } else if (double.bootstrap == "FDB") {
                LRT.2[b] <- attr(res[[b]], "LRT.2")
            }
        } else {
            error.idx <- c(error.idx, b)
        }
    }

    #Error handling
    if (length(error.idx) > 0L) {
        #warning("lavaan WARNING: only ", (R - length(error.idx)),
        #        " bootstrap draws were successful")
        LRT <- LRT[-error.idx]
        if(length(LRT) == 0) LRT <- as.numeric(NA)

        if (double.bootstrap == "standard") {
            plugin.pvalues <- plugin.pvalues[-error.idx]
            attr(LRT, "error.idx") <- error.idx

        }
        if (double.bootstrap == "FDB") {
            LRT.2 <- LRT.2[-error.idx]
            attr(LRT.2, "error.idx") <- error.idx
        }
    } else {
        if (verbose)
            cat("Number of successful bootstrap draws:", (R -
                                                          length(error.idx)), "\n")
    }

    pvalue <- sum(LRT > LRT.original) / length(LRT)

    if (return.LRT) {
        attr(pvalue, "LRT.original") <- LRT.original
        attr(pvalue, "LRT") <- LRT
    }
    if (double.bootstrap == "FDB") {
        Q <- (1 - pvalue)
        lrt.q <- quantile(LRT.2, Q, na.rm = TRUE)
        adj.pvalue <- sum(LRT > lrt.q) / length(LRT)
        attr(pvalue, "lrt.q") <- lrt.q
        attr(pvalue, "adj.pvalue") <- adj.pvalue

        if (return.LRT) {
            attr(pvalue, "LRT.original") <- LRT.original
            attr(pvalue, "LRT") <- LRT
            attr(pvalue, "LRT2") <- LRT.2
        }
    } else if (double.bootstrap == "standard") {
        adj.alpha <- quantile(plugin.pvalues, double.bootstrap.alpha,
                              na.rm=TRUE)
        attr(pvalue, "adj.alpha") <- adj.alpha
        adj.pvalue <- sum(plugin.pvalues < pvalue) / length(plugin.pvalues)
        attr(pvalue, "plugin.pvalues") <- plugin.pvalues
        attr(pvalue, "adj.pvalue") <- adj.pvalue
    }

    pvalue
}

Try the lavaan package in your browser

Any scripts or data that you put into this service are public.

lavaan documentation built on July 26, 2023, 5:08 p.m.