R/boot_pvl.R
In qtl2pleio: Testing Pleiotropy in Multiparental Populations

Documented in boot_pvl

#' Perform bootstrap sampling and calculate test statistic for each bootstrap sample
#'
#' Create a bootstrap sample, perform multivariate QTL scan, and calculate log10 LRT statistic
#'
#' Performs a parametric bootstrap method to calibrate test statistic values in the test of
#' pleiotropy vs. separate QTL. It begins by inferring parameter values at
#' the `pleio_peak_index` index value in the object `probs`. It then uses
#' these inferred parameter values in sampling from a multivariate normal
#' distribution. For each of the `nboot` sampled phenotype vectors, a two-dimensional QTL
#' scan, starting at the marker indexed by `start_snp` within the object
#' `probs` and extending for a total of `n_snp` consecutive markers. The
#' two-dimensional scan is performed via the function `scan_pvl_clean`. For each
#' two-dimensional scan, a log10 likelihood ratio test statistic is calculated. The
#' outputted object is a vector of `nboot` log10 likelihood ratio test
#' statistics from `nboot` distinct bootstrap samples.
#'
#' @param probs founder allele probabilities three-dimensional array for one chromosome only (not a list)
#' @param pheno n by d matrix of phenotypes
#' @param addcovar n by c matrix of additive numeric covariates
#' @param kinship a kinship matrix, not a list
#' @param start_snp positive integer indicating index within probs for start of scan
#' @param n_snp number of (consecutive) markers to use in scan
#' @param pleio_peak_index positive integer index indicating genotype matrix for bootstrap sampling. Typically acquired by using `find_pleio_peak_tib`.
#' @param nboot number of bootstrap samples to acquire and scan
#' @param max_iter maximum number of iterations for EM algorithm
#' @param max_prec stepwise precision for EM algorithm. EM stops once incremental difference in log likelihood is less than max_prec
#' @param cores number of cores to use when calling mclapply to parallelize the bootstrap analysis.
#' @export
#' @importFrom stats var
#' @references Knott SA, Haley CS (2000) Multitrait
#' least squares for quantitative trait loci detection.
#' Genetics 156: 899–911.
#'
#' Walling GA, Visscher PM, Haley CS (1998) A comparison of
#' bootstrap methods to construct confidence intervals in QTL mapping.
#' Genet. Res. 71: 171–180.
#' @examples
#'
#' n <- 50
#' pheno <- matrix(rnorm(2 * n), ncol = 2)
#' rownames(pheno) <- paste0("s", 1:n)
#' colnames(pheno) <- paste0("tr", 1:2)
#' probs <- array(dim = c(n, 2, 5))
#' probs[ , 1, ] <- rbinom(n * 5, size = 1, prob = 0.2)
#' probs[ , 2, ] <- 1 - probs[ , 1, ]
#' rownames(probs) <- paste0("s", 1:n)
#' colnames(probs) <- LETTERS[1:2]
#' dimnames(probs)[[3]] <- paste0("m", 1:5)
#' boot_pvl(probs = probs, pheno = pheno,
#'         start_snp = 1, n_snp = 5, pleio_peak_index = 3, nboot = 1, cores = 1)
#'
#'
#' @return numeric vector of (log) likelihood ratio test statistics from `nboot_per_job` bootstrap samples
#'
boot_pvl <- function(probs,
                     pheno,
                     addcovar = NULL,
                     kinship = NULL,
                     start_snp = 1,
                     n_snp,
                     pleio_peak_index,
                     nboot = 1,
                     max_iter = 1e+04,
                     max_prec = 1 / 1e+08,
                     cores = 1
                     )
    {

    inputs <- process_inputs(probs = probs,
                             pheno = pheno,
                             addcovar = addcovar,
                             kinship = kinship,
                             max_iter = max_iter,
                             max_prec = max_prec)
    ## define X1 - a single marker's allele probabilities
    X1 <- inputs$probs[ , , pleio_peak_index]
    if (!is.null(inputs$addcovar)) {
        Xpre <- cbind(X1, inputs$addcovar)
    } else {
        Xpre <- X1
    }
    d_size <- ncol(inputs$pheno)
    Xlist <- vector(length = d_size)
    Xlist <- lapply(Xlist, FUN = function(x){x <- Xpre; return(x)})
    X <- gemma2::stagger_mats(Xlist)

        Bcol <- rcpp_calc_Bhat2(X = X,
                         Sigma_inv = inputs$Sigma_inv,
                         Y = as.vector(as.matrix(inputs$pheno))
    )
    B <- matrix(data = Bcol, nrow = ncol(Xpre), ncol = d_size, byrow = FALSE)
    # Start loop to get Ysim matrices
    Ysimlist <- list()
    for (i in 1:nboot) {
        foo <- sim1(X = X, B = B, Sigma = inputs$Sigma)
        Ysim <- matrix(foo, ncol = d_size, byrow = FALSE)
        rownames(Ysim) <- rownames(inputs$pheno)
        colnames(Ysim) <- paste0("t", 1:d_size)
        Ysimlist[[i]] <- Ysim
    }
    # prepare table of marker indices for each call of scan_pvl_clean
    mytab <- prep_mytab(d_size = d_size, n_snp = n_snp)

    scan_out <- parallel::mclapply(X = Ysimlist,
                                   FUN = scan_pvl_clean,
                                   mc.cores = cores,
                                   probs = inputs$probs,
                                   addcovar = inputs$addcovar,
                                   Sigma_inv = inputs$Sigma_inv,
                                   Sigma = inputs$Sigma,
                                   start_snp = start_snp,
                                   mytab = mytab,
                                   n_snp = n_snp
                                   )
    lrt <- parallel::mclapply(X = scan_out, FUN = function(x){
        x %>%
            calc_profile_lods() %>%
            dplyr::select(profile_lod) %>%
            max()
    })
    return(unlist(lrt))
}