R/PBsampler.R

#' @title Parametric bootstrap sampler for lasso, group lasso, scaled lasso or scaled group lasso estimator
#'
#' @description Draw gaussian bootstrap or wild multiplier bootstrap samples for
#' lasso, group lasso, scaled lasso and scaled group lasso estimators along with their subgradients.
#'
#' @param X predictor matrix.
#' @param PE_1,sig2_1,lbd_1 parameters of target distribution.
#' (point estimate of beta or \code{E(y)} depends on \code{PEtype}, variance estimate of error and lambda)
#' sig2_1 is only needed when \code{Btype = "wild"}.
#' @param PE_2,sig2_2,lbd_2 additional parameters of target distribution. This is required
#' only if mixture distribution is used. sig2_2 is only needed when \code{Btype = "wild"}.
#' @param group \code{p} x \code{1} vector of consecutive integers describing the group structure.
#' The number of groups should be the same as max(group). Default is \code{group = 1:p}
#' , where \code{p} is number of covariates. See examples for a guideline.
#' @param weights weight vector with length equal to the number of groups. Default is
#' \code{rep(1, max(group))}.
#' @param niter integer. The number of iterations. Default is \code{niter = 2000}
#' @param type type of penalty. Must be specified to be one of the following:
#'  \code{"lasso", "grlasso", "slasso"} or \code{"sgrlasso"}.
#' @param PEtype Type of \code{PE} which is needed to characterize the target distribution.
#' Users can choose either \code{"coeff"} or \code{"mu"}.
#' @param Btype Type of bootstrap method. Users can choose either \code{"gaussian"}
#' for gaussian bootstrap or \code{"wild"} for wild multiplier bootstrap. Default
#' is \code{"gaussian"}.
#' @param Y response vector. This is only required when \code{Btype = "wild"}.
#' @param parallel logical. If \code{parallel = TRUE}, uses parallelization.
#' Default is \code{parallel = FALSE}.
#' @param ncores integer. The number of cores to use for parallelization.
#' @param verbose logical. This works only when
#'  \code{parallel = FALSE}.
#'
#' @details This function provides bootstrap samples for lasso, group lasso,
#' scaled lasso or scaled group lasso estimator
#' and its subgradient. \cr
#' The sampling distribution is characterized by \code{(PE, sig2, lbd)}.
#' If \code{Btype = "gaussian"}, \code{error_new} is generated from \code{N(0, sig2)}.
#' If \code{Btype = "wild"}, we first generate \code{error_new} from \code{N(0, 1)}
#'  and multiply with the residuals.
#' Then, if \code{PEtype = "coeff"}, \code{y_new} is generated by \code{X * PE + error_new}
#' and if \code{PEtype = "mu"}, \code{y_new} is generated by \code{PE + error_new}. \cr
#' By providing \code{(PE_2, sig2_2, lbd_2)}, this function simulates from a mixture distribution.
#' With 1/2 probability, samples will be drawn from the distribution with parameters
#' (PE_1, sig2_1, lbd_1) and with another 1/2 probability, they will be drawn from
#' the distribution with parameters (PE_2, sig2_2, lbd_2).
#' Four distinct penalties can be used; \code{"lasso"} for lasso, \code{"grlasso"} for group lasso,
#' \code{"slasso"} for scaled lasso and \code{"sgrlasso"} for scaled group lasso.
#' See Zhou(2014) and Zhou and Min(2017) for details.
#'
#' @references
#' Zhou, Q. (2014), "Monte Carlo simulation for Lasso-type problems by estimator augmentation,"
#' Journal of the American Statistical Association, 109, 1495-1516.
#'
#' Zhou, Q. and Min, S. (2017), "Estimator augmentation with applications in
#' high-dimensional group inference," Electronic Journal of Statistics, 11(2), 3039-3080.
#'
#' @return \item{beta}{coefficient estimate.}
#' @return \item{subgrad}{subgradient.}
#' @return \item{hsigma}{standard deviation estimator, for type="slasso" or type="sgrlasso" only.}
#' @return \item{X, PE, sig2, weights, group, type, PEtype, Btype, Y, mixture}{model parameters.}
#' @examples
#' set.seed(1234)
#' n <- 10
#' p <- 30
#' Niter <-  10
#' Group <- rep(1:(p/10), each = 10)
#' Weights <- rep(1, p/10)
#' x <- matrix(rnorm(n*p), n)
#' #
#' # Using non-mixture distribution
#' #
#' PBsampler(X = x, PE_1 = rep(0, p), sig2_1 = 1, lbd_1 = .5,
#'  weights = Weights, group = Group, type = "grlasso", niter = Niter, parallel = FALSE)
#' PBsampler(X = x, PE_1 = rep(0, p), sig2_1 = 1, lbd_1 = .5,
#'  weights = Weights, group = Group, type = "grlasso", niter = Niter, parallel = TRUE)
#' #
#' # Using mixture distribution
#' #
#' PBsampler(X = x, PE_1 = rep(0, p), sig2_1 = 1, lbd_1 = .5,
#'  PE_2 = rep(1, p), sig2_2 = 2, lbd_2 = .3, weights = Weights,
#'  group = Group, type = "grlasso", niter = Niter, parallel = TRUE)
#' @export
PBsampler <- function(X, PE_1, sig2_1, lbd_1, PE_2,
  sig2_2, lbd_2, weights = rep(1, max(group)), group = 1:ncol(X), niter = 2000,
  type, PEtype = "coeff", Btype="gaussian", Y = NULL, parallel = FALSE, ncores = 2L,
  verbose = FALSE)
{
  n <- nrow(X)
  p <- ncol(X)
  Y <- c(Y)

  #--------------------
  # Error Handling
  #--------------------
  # if (Btype == "wild") {
  #   sig2_1 <- NULL
  #   if (!missing(lbd_2) && !mising(PE_2)) {
  #     sig2_2 <- NULL
  #   }
  # }
  if (!is.null(Y) & length(Y) != n) {
    stop("dimension of X and Y are not conformable.")
  }
  if (!type %in% c("lasso", "grlasso", "slasso", "sgrlasso")) {
    stop("type has to be either lasso, grlasso, slasso or sgrlasso.")
  }
  if (!PEtype %in% c("coeff", "mu")) {
    stop("Invalide PEtype.")
  }
  if (!Btype %in% c("gaussian", "wild")) {
    stop("Invalide Btype.")
  }
  if (length(group) != p) {
    stop("group must have a same length with the number of X columns")
  }
  if (!all(group==1:p) && (!type %in% c("grlasso", "sgrlasso"))) {
    stop("Choose type to be either grlasso or sgrlasso if group-structure exists.")
  }
  # if (all(group==1:p) && (!type %in% c("lasso", "slasso"))) {
  #   stop("Choose type to be either lasso or slasso if group-structure does not exist.")
  # }

  parallelTemp <- ErrorParallel(parallel,ncores)
  parallel <- parallelTemp$parallel
  ncores <- parallelTemp$ncores
  if (length(weights) != length(unique(group))) {
    stop("weights has to have a same length as the number of groups")
  }
  if (any(weights <= 0)) {
    stop("weights should be positive.")
  }
  if (any(!1:max(group) %in% group)) {
    stop("group index has to be a consecutive integer starting from 1.")
  }
  if (any(missing(PE_1), missing(sig2_1), missing(lbd_1))) {
    stop("provide all the parameters for the distribution.")
  }
  if (!sum(c(missing(sig2_2), missing(lbd_2), missing(PE_2)))
      %in% c(0, 3)) {
    stop("provide all the parameters for the mixture distribution.")
  }
  #--------------------
  # Main Step
  #--------------------
  Mixture <- !missing(PE_2)

  if (Mixture) {
    niter1 <- rbinom(n = 1, size = niter, prob = 1/2)
    niter2 <- niter-niter1

    PB1 <- PBsamplerMain(X = X, PE = PE_1, sig2 = sig2_1,
                         lbd = lbd_1, weights = weights, group = group, niter = niter1,
                         type = type, PEtype = PEtype, Btype = Btype, Y = Y, parallel = parallel,
                         ncores = ncores, verbose = verbose)
    PB2 <- PBsamplerMain(X = X, PE = PE_2, sig2 = sig2_2,
                         lbd = lbd_2, weights = weights, group = group, niter = niter2,
                         type = type, PEtype = PEtype, Btype = Btype, Y = Y, parallel = parallel,
                         ncores = ncores, verbose = verbose)
    if (type %in% c("lasso", "grlasso")) {
      RESULT <- list(beta = rbind(PB1$beta, PB2$beta),
                     subgrad = rbind(PB1$subgrad, PB2$subgrad), X = X,
                     PE = rbind(PE_1, PE_2),
                     sig2 = c(sig2_1, sig2_2), lbd = c(lbd_1, lbd_2), weights = weights, group = group,
                     type = type, PEtype = PEtype, Btype = Btype, Y = Y, mixture = Mixture)
    } else {
      RESULT <- list(beta = rbind(PB1$beta, PB2$beta),
                     subgrad = rbind(PB1$subgrad, PB2$subgrad), hsigma = c(PB1$hsigma, PB2$hsigma), X = X,
                     PE = rbind(PE_1, PE_2),
                     sig2 = c(sig2_1, sig2_2), lbd = c(lbd_1, lbd_2), weights = weights, group = group,
                     type = type, PEtype = PEtype, Btype = Btype, Y = Y, mixture = Mixture)
    }
  } else {
    PB <- PBsamplerMain(X = X, PE = PE_1,
      sig2 = sig2_1, lbd = lbd_1, weights = weights, group = group,
      niter = niter, type = type, PEtype = PEtype, Btype = Btype, Y = Y, parallel = parallel,
      ncores = ncores, verbose = verbose)
    if (type %in% c("lasso", "grlasso")) {
      RESULT <- list(beta = PB$beta, subgrad = PB$subgrad, X = X,
                     PE = PE_1, sig2 = sig2_1, lbd = lbd_1, weights = weights, group = group,
                     type = type, PEtype = PEtype, Btype = Btype, Y = Y, mixture = Mixture)
    } else {
      RESULT <- list(beta = PB$beta, subgrad = PB$subgrad, hsigma = PB$hsigma, X = X,
                     PE = PE_1, sig2 = sig2_1, lbd = lbd_1, weights = weights, group = group,
                     type = type, PEtype = PEtype, Btype = Btype, Y = Y, mixture = Mixture)
    }
  }
  RESULT$call <- match.call()
  class(RESULT) <- "PB"
  return(RESULT)
}

PBsamplerMain <- function(X, PE, sig2, lbd, weights,
 group, niter, type, PEtype, Btype, Y, parallel,
 ncores, verbose)
{
  n <- nrow(X);
  p <- ncol(X);

  # Error Handling
  if (length(sig2) !=1 || length(lbd) != 1) {
    stop("sig2/lbd should be a scalar.")
  }
  if (sig2 <= 0) {
    stop("sig2 should be positive.")
  }
  if (lbd < 0) {
    stop("lbd should be non-negative.")
  }
  if (parallel && verbose) {
    warning("Note that verbose only works when parallel=FALSE")
  }
  if (PEtype == "coeff" && length(PE) != p) {
    stop("PE must have a same length with the col-number of X, if PEtype = \"coeff\"")
  }
  if (PEtype == "mu" && length(PE) != n) {
    stop("PE must have a same length with the row-number of X, if PEtype = \"mu\"")
  }
  if(verbose) {
    niter.seq <- round(seq(1, niter, length.out = 11))[-1]
  }

  W <- rep(weights, table(group))

  Xtilde   <- scale(X, FALSE, scale = W)
  t.Xtilde <- t(Xtilde) # same as  t(X) / W
  GramMat   <- t(Xtilde) %*% Xtilde
  Lassobeta <- matrix(0, niter, p)
  Subgrad   <- matrix(0, niter, p)
  Ysample   <- numeric(n)
  if (PEtype == "coeff") {
    Yexpect <- X %*% PE
  } else {
    Yexpect <- PE
  }
  if (Btype == "wild") {
    res <- Y - Yexpect
    res <- res - mean(res) # centering the residuals
  }

  GAMMA <- groupMaxEigen(X = Xtilde, group = group)

  if (type %in% c("lasso", "grlasso")) {
    FF <- function(x) {
      if (Btype == "wild") {
        epsilon <- rnorm(n, mean = 0, sd = sig2^0.5)
        epsilon <- epsilon * res
      } else {
        epsilon <- rnorm(n, mean = 0, sd = sig2^0.5)
      }
      Ysample <- Yexpect + epsilon;
      #if(center){Ysample=Ysample-mean(Ysample);}

      # LassoFit <- gglasso(Xtilde, Ysample, pf = rep(1, max(group)),
      #                     group = group, loss = "ls", intercept = FALSE, lambda = lbd)
      # Lassobeta <- coef(LassoFit)[-1] / W
      LassoFit <- grlassoFit(X = Xtilde, Y = Ysample, group = group,
                             weights = rep(1, max(group)), Gamma = GAMMA, lbd = lbd)
      Lassobeta <- LassoFit$coef / W
      return(c(Lassobeta, (t.Xtilde %*% Ysample -
                             GramMat %*% (Lassobeta * W)) / n / lbd))
    }
  } else {
    FF <- function(x) {
      if (Btype == "wild") {
        epsilon <- rnorm(n, mean = 0, sd = sig2^0.5)
        epsilon <- epsilon * res
      } else {
        epsilon <- rnorm(n, mean = 0, sd = sig2^0.5)
      }
      Ysample <- Yexpect + epsilon;

      Fit <- slassoFit.tilde(Xtilde = Xtilde, Y = Ysample, lbd = lbd, group = group, weights = weights, Gamma = GAMMA, verbose = verbose)
      return(c(Fit$B0, Fit$S0, Fit$hsigma))
    }

    # FF <- function(x) {
    #   if (Btype == "wild") {
    #     epsilon <- rnorm(n, mean = 0, sd = 1)
    #     epsilon <- epsilon * res
    #   } else {
    #     epsilon <- rnorm(n, mean = 0, sd = sig2^0.5)
    #   }
    #   Ysample <- Yexpect + epsilon;
    #   #if(center){Ysample=Ysample-mean(Ysample);}
    #
    #   sig <- signew <- .5
    #   K <- 1 ; niter <- 0
    #   while(K == 1 & niter < 1000){
    #     sig <- signew;
    #     lam <- lbd * sig
    #     # B0 <- coef(gglasso(Xtilde,Ysample,loss="ls",group=group,pf=rep(1,max(group)),lambda=lam,intercept = FALSE))[-1]
    #     B0 <- grlassoFit(X = Xtilde, Y = Ysample, group = group,
    #                      weights = rep(1, max(group)), Gamma = Gamma, lbd = lbd)$coef
    #     signew <- sqrt(crossprod(Ysample-Xtilde %*% B0) / n)
    #
    #     niter <- niter + 1
    #     if (abs(signew - sig) < 1e-04) {K <- 0}
    #     if (verbose) {
    #       cat(niter, "\t", sprintf("%.3f", slassoLoss(Xtilde,Ysample,B0,signew,lbd)),"\t",
    #           sprintf("%.3f", signew), "\n")
    #     }
    #   }
    #   hsigma <- c(signew)
    #   S0 <- (t.Xtilde %*% (Ysample - Xtilde %*% B0)) / n / lbd / hsigma
    #   B0 <- B0 / rep(weights,table(group))
    #   return(c(B0, S0, hsigma))
    # }
  }

  if (parallel == FALSE) {
    TEMP      <- lapply(1:niter, FF)
    TEMP      <- do.call(rbind, TEMP)
  } else {
    TEMP <- parallel::mclapply(1:niter, FF, mc.cores = ncores)
    TEMP <- do.call(rbind, TEMP)
  }
  Lassobeta <- TEMP[, 1:p]
  Subgrad   <- TEMP[, 1:p + p]
  Result <- list(beta = TEMP[, 1:p], subgrad = TEMP[, 1:p + p])
  if (type %in% c("slasso", "sgrlasso")) {
    Result$hsigma <- TEMP[, 2*p + 1]
  }
  return(Result);
}

#' @title Provide \code{(1-alpha)\%} confidence interval of each coefficients
#'
#' @description Using samples drawn by \code{\link{PBsampler}}, computes
#' \code{(1-alpha)\%} confidence interval of each coefficient.
#'
#' @param object bootstrap samples of class \code{PB} from \code{\link{PBsampler}}
#' @param alpha significance level.
#' @param method bias-correction method. Either to be "none" or "debias".
#' @param parallel logical. If \code{TRUE}, use parallelization. Default is \code{FALSE}.
#' @param ncores integer. The number of cores to use for parallelization.
#'
#' @details If \code{method = "none"}, \code{\link{PB.CI}} simply compute
#' the two-sided \code{(1-alpha)} quantile of the sampled coefficients.
#' If \code{method = "debias"}, we use
#' debiased estimator to compute confidence interval.
#'
#' @return \code{(1-alpha)\%} confidence interval of each coefficients
#'
#' @references
#' Zhang, C., Zhang, S. (2014), "Confidence intervals for low dimensional
#' parameters in high dimensional linear models," Journal of the Royal
#' Statistical Society: Series B, 76, 217–242.
#'
#' Dezeure, R., Buhlmann, P., Meier, L. and Meinshausen, N. (2015),
#' "High-Dimensional Inference: Confidence Intervals, p-values and R-Software hdi,"
#' Statistical Science, 30(4), 533-558
#'
#' @examples
#' set.seed(1234)
#' n <- 40
#' p <- 50
#' Niter <-  10
#' X <- matrix(rnorm(n*p), n)
#' object <- PBsampler(X = X, PE_1 = c(1,1,rep(0,p-2)), sig2_1 = 1, lbd_1 = .5,
#' niter = 100, type = "lasso")
#' parallel <- (.Platform$OS.type != "windows")
#' PB.CI(object = object, alpha = .05, method = "none")
#'
#' @export
PB.CI <- function(object, alpha = .05, method = "debias", parallel=FALSE, ncores=2L) {

  if (class(object)!="PB") {
    stop("object class has to be \"PB\".")
  }
  parallelTemp <- ErrorParallel(parallel,ncores)
  parallel <- parallelTemp$parallel
  ncores <- parallelTemp$ncores

  X <- object$X
  n <- nrow(X)
  p <- ncol(X)
  weights <- object$weights
  lbd <- object$lbd
  group <- object$group
  type <- object$type

  if (!method %in% c("none", "debias")) {
    stop("method should be either \"default\" or \"debias\".")
  }
  if (method == "none") {
    Result <- apply(object$beta,2,quantile,prob=c(alpha/2,1-alpha/2))
  } else { # debiased estimator
    B <- object$beta # niter x p matrix
    S <- object$subgrad # niter x p matrix
    W <- rep(weights, table(group))

    refitY <- solve(X%*%t(X))%*%X %*% (crossprod(X) %*% t(B) / n +
                                       lbd * W * t(S)) * n # n x niter matrix

    hdiFit <- hdi::lasso.proj(x = X, y = X %*% rep(1,p) + rnorm(n),
                              standardize = FALSE, parallel = parallel,
                              ncores = ncores, return.Z = TRUE)
    Z <- hdiFit$Z

    if (type %in% c("lasso", "slasso")) {
      ZrefitY <- crossprod(Z, refitY) # p x niter
      ZX <- colSums(Z * X) # length p
      ZXcomp <- matrix(0,p-1,p)
      for (i in 1:p) {
        ZXcomp[,i] <- crossprod(Z[,i], X[, -i])
      }
    } else {
      J <- max(group)
      inv.ZX.X <- vector("list", J) # each with size n x p_j
      for (i in 1:J) {
        inv.ZX.X[[i]] <- Z[, group==i] %*% MASS::ginv(t(Z[,group==i])%*%X[,group==i])
      }

      tinv.ZX.X.refitY <- vector("list", J) # each with size p_j x niter
      for (i in 1:J) {
        tinv.ZX.X.refitY[[i]] <- crossprod(inv.ZX.X[[i]], refitY)
      }

      ZXcomp.group <- vector("list", J) # each w/ size p_j x (n - p_j)
      for (i in 1:J) {
        ZXcomp.group[[i]] <- crossprod(inv.ZX.X[[i]], X[, group!=i])
      }

      ZXcomp.B <- vector("list", J) # each w/ size p_j x niter
      for (i in 1:J) {
        ZXcomp.B[[i]] <- tcrossprod(ZXcomp.group[[i]], B[, group!=i])
      }
    }

    refitB <- matrix(0,nrow(B),ncol(B))

    # eq(4) from Zhang & Zhang(2014)
    FF <- function(x) {
      if (type %in% c("lasso", "slasso")) {
        TEMP <- c()
        for (j in 1:ncol(B)) {
          TEMP[j] <- (ZrefitY[j,x] - ZXcomp[,j] %*% B[x,-j]) / ZX[j]
        }
      } else { # eq (2.12) from Mitra & Zhang(2016) with little modification
        TEMP <- c()
        for (j in 1:J) {
          TEMP[group==j] <- tinv.ZX.X.refitY[[j]][,x] - ZXcomp.B[[j]][,x]
        }
      }
      return(TEMP)
    }

    if (parallel) {
      refitB <- parallel::mcmapply(FF, 1:nrow(B), mc.cores = ncores)
    } else {
      refitB <- mapply(FF, 1:nrow(B))
    }

    Result <- apply(refitB,1,quantile,prob=c(alpha/2,1-alpha/2))
  }
  colnames(Result) <- paste("beta", 1:ncol(object$X), sep = "")
  #class(Result) <- "PB.CI"
  return(Result)
}
# HDI$bhat[j] == HDI$betahat[j] + t(Z[,j])%*%(Y-X%*%HDI$betahat) / crossprod(Z[,j],X[,j])

Try the EAinference package in your browser

Any scripts or data that you put into this service are public.

EAinference documentation built on May 2, 2019, 3:36 p.m.