R/normalLinearInference.R

#' Inference for Normal Means after Aggregate Testing with a Linear Test
#'
#' @description \code{mvnLinear} is used to estimate a normal means model that was selected based
#' on a single linear aggregate test of the form:
#' \deqn{a'y > u or a'y < l,} 
#' \deqn{l < u.}
#'
#' @param y the observed normal vector.
#'
#' @param sigma the covariance matrix of \code{y}.
#'
#' @param testVec the test testVec \eqn{a} of size \code{length(y)} used in the aggregate test
#'
#' @param threshold the threshold used in the aggregate test, either a vector of size two for the lower and upper
#' thresholds \eqn{u, l}, or a single number.
#'
#' @param pval_threshold the signficance level of the aggregate test.
#' Overrided by \code{threshold} if both are provided.
#' 
#' @param contrasts an optional matrix of contrasts to be tested: must have number of columns 
#' identical to the length of \code{y}. If left as \code{NULL}, the coorinates of \code{y} will be tested by default. 
#' 
#' @param test_direction whether the linear test is one-sided or two-sided. Will be used if the provided threshold 
#' is a scalar, if lower then the tests will be \eqn{a'y < threshold} and if upper then the test will be \eqn{a'y > threshold}.
#'
#' @param estimate_type see \code{\link{mvnQuadratic}} for details.
#'
#' @param pvalue_type see \code{\link{mvnQuadratic}} for details.
#'
#' @param ci_type see \code{\link{mvnQuadratic}} for details.
#'
#' @param confidence_level the confidence level for constructing confidencei intervals.
#'
#' @param verbose whether to report on the progress of the computation.
#' 
#' @param control an object of type \code{\link{psatControl}}.
#'
#' @details The function is used to perform inference for normal mean vectors
#' that were selected based on a single linear aggregate test. To be exact, suppose
#' that \eqn{y ~ N(\mu,\Sigma)} and that we are interested in estimating \eqn{\mu}
#' only if we can determine that \eqn{\mu != 0} using an aggregate test of the form:
#' \eqn{a'y <l} or \eqn{a'y > u} for some predetermined constants \eqn{a, l, u}.
#'
#' The \code{threshold} parameter specifies the constants \eqn{l<u} which are used
#' to threshold the aggregate test. If only a single number is provided, then the threshold
#' will be set according to test_direction: 
#' \itemize{
#' \item lower: a'y < threshold
#' \item upper: a'y > threshold
#' \item two-sided a'y < -threshold, or a'y > threshold
#' }
#' The \code{threshold} parameter takes precedence over \code{pval_threshold} if both
#' are specified. 
#'
#' See \code{\link{mvnQuadratic}} for details regarding the available inference methods.
#'
#' @return An object of class \code{mvnLinear}.
#'
#' @seealso \code{\link{mvnQuadratic}}, \code{\link{psatGLM}}, \code{\link{getCI}},
#' \code{\link{getPval}}.
mvnLinear <- function(y, sigma, testVec,
                      threshold = NULL, pval_threshold = 0.05,
                      contrasts = NULL,
                      test_direction = c("two-sided", "lower", "upper"),
                      estimate_type = c("mle", "naive"),
                      pvalue_type = c("hybrid", "polyhedral", "naive"),
                      ci_type = c("switch", "polyhedral", "naive"),
                      confidence_level = .95,
                      verbose = TRUE,
                      control = psatControl()) {

  # Getting control parameters ------
  switchTune <- control$switchTune
  nullMethod <- control$nullMethod
  nSamples <- control$nSamples
  trueHybrid <- control$trueHybrid
  rbIters <- control$rbIters

  # Checking input ----
  checkPvalues(pvalue_type)
  checkCI(ci_type)

  if(any(length(y) != dim(sigma))) {
    stop("Incorrect dimensions for y or sigma!")
  }

  if(length(y) != length(testVec)) {
    stop("Incorrect dimension for y or testVec!")
  }

  if(confidence_level <= 0 | confidence_level >= 1) {
    stop("confidence_level must be between 0 and 1!")
  }
  confidence_level <- 1 - confidence_level
  
  # Checking contrasts
  if(is.null(contrasts)) {
    contrasts <- diag(length(y))
  } 
  
  if(ncol(contrasts) != length(y) | 
     nrow(contrasts) < 1) {
    stop("Contrasts must be a matrix with length(y) columns! (and at least one row)")
  }

  # Setting threshold ----------------------------
  if(!any(test_direction %in% c("two-sided", "lower", "upper"))) {
     if(missing(threshold)) {
       stop("test_direction must be one of `two-sided', `lower' or `upper'!
         (or a threshold of length 2 must be provided)")
     } else if(length(threshold) < 2) {
       stop("test_direction must be one of `two-sided', `lower' or `upper'!
         (or a threshold of length 2 must be provided)")
     }
  }

  p <- length(y)
  testVecVar <- as.numeric(t(testVec) %*% sigma %*% testVec)
  test_direction <- test_direction[1]
  if(missing(threshold) | is.null(threshold)) {
    if(pthreshold < 0 | pthreshold > 1) {
      stop("If a threshold is not provided, then pval_threshold must be between 0 and 1!")
    } else {
      if(test_direction == "two-sided") {
        threshold <- qnorm(1 - pval_threshold/2, sd = sqrt(testVecVar))
        threshold <- c(-threshold, threshold)
      } else if(test_direction == "lower") {
        threshold <- qnorm(pval_threshold, sd = sqrt(testVecVar))
        threshold <- c(threshold, Inf)
      } else if(test_direction == "upper") {
        threshold <- qnorm(1 - pval_threshold, sd = sqrt(testVecVar))
        threshold <- c(-Inf, threshold)
      } else {
        stop("If threshold is not provided then test_direction must be one of `two-sided'
             `lower' or `upper'!")
      }
    }
  }

  if(length(threshold) > 2) {
    stop("The length of threshold must be either 1 or 2!")
  } else if(length(threshold) == 1) {
    if(test_direction == "two-sided") {
      warning(paste("Threshold of length one provided for a two sided test_direction rule.
                    test_direction rule assumed to be testVec < -abs(threshold) or testVec > abs(threshold)"))
      threshold <- c(-abs(threshold), abs(threshold))
    } else if(test_direction == "lower") {
      threshold <- c(threshold, Inf)
    } else {
      threshold <- c(-Inf, threshold)
    }
  }

  lowerProb <- pnorm(threshold[1], sd = sqrt(testVecVar), lower.tail = TRUE)
  upperProb <- pnorm(threshold[2], sd = sqrt(testVecVar), lower.tail = FALSE)
  pthreshold <- lowerProb + upperProb

  # Validating test_direction --------------
  y <- as.numeric(y)
  testStat <- sum(y * testVec)
  if(testStat > threshold[1] & testStat < threshold[2]) {
    stop("Test statistic did not cross threshold!")
  }

  # Regime switching CI tuning.
  if(is.null(switchTune)) {
    t2 <- confidence_level^2 * pthreshold
  } else {
    t2 <- switchTune * pthreshold
  }

  # Computing MLE -------------------------
  if("mle" %in% estimate_type) {
    mleMu <- computeLinearMLE(y, sigma, testVec, threshold)
  } else {
    mleMu <- NULL
  }

  if(estimate_type[1] == "mle") {
    muhat <- mleMu
  }

  # Sampling from the global-null ---------------------
  if(is.null(nSamples)) {
    nSamples <- round(length(y) * 5 / confidence_level)
    nSamples <- max(nSamples, 100 * length(y))
  }

  if(any(c("global-null", "hybrid") %in% pvalue_type)) {
    nullMu <- rep(0, length(y))
    nullSample <- sampleLinearTest(nSamples, mu = rep(0, p), sigma, testVec, threshold)
    nullPval <- numeric(nrow(contrasts))
    for(i in 1:nrow(contrasts)) {
      cont <- sum(contrasts[i, ] * y)
      contsamp <- as.numeric(nullSample %*% contrasts[i, ])
      nullPval[i] <- 2 * min(mean(cont < contsamp), mean(cont > contsamp))
    }
    if(pvalue_type[1] == "global-null") {
      pvalue <- nullPval
    }
  } else {
    nullSample <- NULL
    nullPval <- NULL
    nullDist <- NULL
  }

  # Computing polyhedral p-values/CIs ---------------------
  if(any(c("polyhedral", "hybrid") %in% pvalue_type) | "polyhedral" %in% ci_type) {
    if(verbose) print("Computing polyhedral p-values/CIs!")
    polyResult <- suppressWarnings(getPolyCI(y, sigma, testVec, threshold, confidence_level,
                                             test = "linear", contrasts = contrasts))
    polyPval <- polyResult$pval
    polyCI <- polyResult$ci
    if(pvalue_type[1] == "polyhedral") pvalue <- polyPval
    if(ci_type[1] == "polyhedral") ci <- polyCI
  } else {
    polyPval <- NULL
    polyCI <- NULL
  }

  # Computing hybrid p-value -----------------
  if("hybrid" %in% pvalue_type) {
    hybridPval <- pmin(2 * pmin(polyPval, nullPval), 1)
  } else {
    hybridPval <- NULL
  }

  if(pvalue_type[1] == "hybrid") {
    pvalue <- hybridPval
  }
  
  # Null Distribution Based CIs ---------------
  if(("global-null" %in% ci_type)) {
    nullCI <- linearRB(y, sigma, testVec, threshold,
                       confidence_level, rbIters = rbIters,
                       variables = contrasts, computeFull = trueHybrid)
  } else {
    nullCI <- NULL
  }

  if(ci_type[1] == "global_null") {
    ci <- nullCI
  }

  # Naive pvalues and CIs -----------------------
  naiveCI <- getNaiveCI(y, sigma, confidence_level, contrasts, TRUE)
  naivePval <- naiveCI$pval
  naiveCI <- naiveCI$ci
  if(ci_type[1] == "naive") {
    ci <- naiveCI
  }
  if(pvalue_type[1] == "naive") {
    pvalue <- naivePval
  }

  # Regime switching CIs -------------------------
  if("switch" %in% ci_type) {
    if(verbose) print("Computing Switching Regime CIs!")
    switchCI <- getSwitchCI(y, sigma, testVec, threshold, pthreshold,
                            confidence_level, contrasts,
                            quadlam,
                            confidence_level^2, testStat,
                            hybridPval, trueHybrid, rbIters,
                            test = "linear")
  } else {
    switchCI <- NULL
  }

  if(ci_type[1] == "switch") {
    ci <- switchCI
  }

  # Computing hybrid CIs ---------------------
  if("hybrid" %in% ci_type) {
    if(verbose) print("Computing Hybrid CIs!")
    hybridCI <- getHybridCI(y, sigma, testVec, threshold, pthreshold, confidence_level,
                            contrasts = contrasts,
                            hybridPval = hybridPval, computeFull = trueHybrid, 
                            rbIters = rbIters,
                            test = "linear")
  } else {
    hybridCI <- NULL
  }

  if(ci_type[1] == "hybrid") {
    ci <- hybridCI
  }

  # Wrapping up ------------------
  results <- list()
  results$muhat <- muhat
  results$ci <- ci
  results$pvalue <- pvalue
  results$contrasts <- contrasts

  results$y <- y
  results$naiveContrast <- as.numeric(contrasts %*% y)
  results$mleMu <- mleMu
  results$mleContrast <- as.numeric(contrasts %*% mleMu)
  results$nullSample <- nullSample
  results$nullPval <- pmax(nullPval, naivePval)
  results$polyPval <- polyPval
  results$polyCI <- polyCI
  results$nullCI <- nullCI
  results$naivePval <- naivePval
  results$naiveCI <- naiveCI
  results$switchCI <- switchCI
  results$hybridPval <- hybridPval
  results$hybridCI <- hybridCI

  results$testVec <- testVec
  results$sigma <- sigma
  results$pthreshold <- pthreshold
  results$threshold <- threshold
  results$confidence_level <- 1 - confidence_level
  results$switchTune <- t2
  results$estimate_type <- estimate_type
  results$pvalue_type <- pvalue_type
  results$ci_type <- ci_type
  results$testStat <- testStat
  results$trueHybrid <- trueHybrid
  results$rbIters <- rbIters
  results$testType <- "linear"

  class(results) <- "mvnLinear"

  return(results)
}

computeLinearMLE <- function(y, sigma, testVec, threshold) {
  naive <- sum(y * testVec)
  cSig <- as.numeric(t(testVec) %*% sigma)
  cSig <- cSig / as.numeric(cSig %*% testVec)
  interval <- c(-abs(naive), abs(naive))
  interval <- sort(interval)
  sd <- as.numeric(sqrt(t(testVec) %*% sigma %*% testVec))

  maximum <- optimize(f = computeLinearDens, interval = interval,
                      maximum = TRUE,
                      naive, y, cSig, sd, threshold, solve(sigma))$maximum
  return(y + (maximum - naive) * cSig)
}

computeLinearDens <- function(m, naive, y, cSig, sd, threshold, precision) {
  diff <- (m - naive) * cSig
  density <- -0.5 * t(diff) %*% precision %*% diff
  prob <- pnorm(threshold[1], sd = sd, mean = m) +
    pnorm(threshold[2], sd = sd, mean = m, lower.tail = FALSE)

  condDens <- density - prob
  return(condDens)
}
ammeir2/PSAT documentation built on May 27, 2019, 7:40 a.m.