R/fastPermFunctions.R

Defines functions PiLgamma diffMean diffMeanStudent diffMedian ratioMean ratioMedian fastPerm print.fastPerm

Documented in diffMean diffMeanStudent diffMedian fastPerm PiLgamma print.fastPerm ratioMean ratioMedian

# pmf of partitions, and
# proposed algorithm

# pmf of partitions ---------------------------------------
PiLgamma <- function(nx, ny){
  #' Exact pmf of partitions with the log gamma function
  #'
  #' This function calculates the exact probability mass function
  #' of the partitions using the lgamma function.
  #' Typically, this function is called from fastPerm.
  #' @param nx Size of first sample
  #' @param ny Size of second sample
  #' @keywords partition pmf
  #' @export
  #' @examples
  #' PiLgamma(nx, ny)
  
  nMin <- min(nx, ny)
  out <- rep(NA, nMin + 1)
  names(out) <- 0:min(nx, ny)
  
  for (m in 0:nMin){
    out[m + 1] <- exp(
    	lgamma(nx + 1) + lgamma(ny + 1) + lgamma(nx + ny - nMin + 1) +
    	lgamma(nMin + 1) - lgamma(nx - m + 1) - lgamma(ny - m + 1) -
    	lgamma(nx + ny +1) - 2*lgamma(m+1)
    	)
  }
  
  return(out)
}
# test statistics, used as input to fastPerm ---------------

diffMean <- function(x, y){
  #' Utility function for fastPerm and SAMC
  #'
  #' Difference in means test statistic used by fastPerm
  #' @param x First sample 
  #' @param y Second sample
  #' @keywords test statistic
  #' @export
  #' @examples
  #' diffMean(x, y)
  
  abs(mean(x) - mean(y))
}
attributes(diffMean) <- list(summary="mean", comparison="difference")

diffMeanStudent <- function(x, y){
  #' Utility function for fastPerm and SAMC
  #'
  #' Difference in means test statistic used by fastPerm
  #' @param x First sample 
  #' @param y Second sample
  #' @keywords test statistic
  #' @export
  #' @examples
  #' diffMeanStudent(x, y)
  nx <- length(x)
  ny <- length(y)
  abs(mean(x) - mean(y)) / sqrt(var(x) / nx + var(y) / ny)
}
attributes(diffMeanStudent) <- list(summary="mean", comparison="studentized difference")


diffMedian <- function(x, y){
  #' Utility function for fastPerm and SAMC
  #'
  #' Difference in medians test statistic used by fastPerm.
  #' Note: In this version of fastPerm, median statistic may not be reliable.
  #' @param x First sample 
  #' @param y Second sample
  #' @keywords test statistic
  #' @export
  #' @examples
  #' diffMedian(x, y)
  
  abs(median(x) - median(y))
}
attributes(diffMedian) <- list(summary="median", comparison="difference")

ratioMean <- function(x, y){
  #' Utility function for fastPerm and SAMC
  #'
  #' Ratio of means test statistic used by fastPerm
  #' @param x First sample 
  #' @param y Second sample
  #' @keywords test statistic
  #' @export
  #' @examples
  #' ratioMean(x, y)
  
  max(mean(x) / mean(y), mean(y) / mean(x))
}
attributes(ratioMean) <- list(summary="mean", comparison="ratio")

ratioMedian <- function(x, y){
  #' Utility function for fastPerm and SAMC
  #'
  #' Ratio of medians test statistic used by fastPerm.
  #' Note: In this version of fastPerm, median statistic may not be reliable.
  #' @param x First sample 
  #' @param y Second sample
  #' @keywords test statistic
  #' @export
  #' @examples
  #' ratioMedian(x, y)
  
  max(median(x) / median(y), median(y) / median(x))
}
attributes(ratioMedian) <- list(summary="median", comparison="ratio")

# proposed algorithm --------------------------------------
fastPerm <- function(x, y, testStat = ratioMean, B=1000, adjusted=FALSE){
#' Fast approximation of small permutation p-values
#'
#' This function approximates the p-value (two-sided) for a two
#' sample permutation test by partitioning the permutation space.
#' This function is most useful for small p-values, e.g. p < 10^-6.
#' @param x First sample
#' @param y Second sample
#' @param testStat Test statistic, defaults to the ratio of the
#' means (ratioMean). Other choices are diffMean, ratioMedian, and
#' diffMedian. In the current version, the median statistics are experimental and may
#' not be reliable.
#' @param B Number of Monte Carlo iterations within each partition. Defaults to 1,000.
#' @keywords fast permtution test two sample
#' @export
#' @examples
#' x <- rexp(100, 5)
#' y <- rexp(100, 2)
#' mStopRatioMean(x, y)
#' fastPerm(x, y)
#'
#' x <- rnorm(110, 0, 1)
#' y <- rnorm(110, 1, 1)
#' mStopDiffMean(x, y)
#' fastPerm(x, y, testStat = diffMean)

  if (attributes(testStat)$comparison == "ratio" & (min(x,y)<0)) {
    stop("Some sample values <=0. With ratio statistics, all values must be greater than 0.")
  }
  
  nx <- length(x)
  ny <- length(y)
  
  # need nx <= ny. swap input if not correctly ordered
  if (ny<nx){
    xtemp <- x
    x <- y
    y <- xtemp
    
    nxtemp <- nx
    nx <- ny
    ny <- nxtemp
  }
  
  pmf <- PiLgamma(nx, ny)
  
  mMax <- as.numeric(names(pmf)[which(pmf == max(pmf))])
  t0 <- testStat(x, y)
  
  countTemp <-  rep(0, nx)
  names(countTemp) <- 1:nx
  
  m <- 1
  sumGTzero <- TRUE
  
  while (sumGTzero & (m <= mMax[1])) {

    tb <- rep(NA, B)
    
    for (b in 1:B){
      starX <- sample(1:nx, size = m, replace = FALSE)
      starY <- sample(1:ny, size = m, replace = FALSE)

      xNew <- c(x[-starX], y[starY])
      yNew <- c(y[-starY], x[starX])
      
      tb[b] <-  testStat(xNew, yNew)
    }
    
    countTemp[m] <- sum(tb >= t0)
    
    sumGTzero <- (countTemp[m] > 0)

    m <- m + 1

  }

  mStop <- m-1
  # only continue if at least 2 partitions estimated
  if (mStop > 1) {
    mLast <- max(which(countTemp>0))
      
    # setup partitions for prediction, to be symmetric about mMax
    if (length(mMax)==1 & mMax[1]==nx){
      mPred <- 1:mMax
    } else if (length(mMax)==1 & mMax[1]<nx){
      mPred <- c(1:mMax,((mMax-1):0)[1:(nx-mMax)])
    } else if (length(mMax)==2 & mMax[2]==nx){
      mPred <- c(1:mMax[1], mMax[1])
    } else {
      mPred <- c(1:mMax[1], (mMax[1]:0)[1:(nx - mMax[1])])
    }
    
    # data frame for regression
    count <- c(B, countTemp[1:mLast]) + 1*adjusted
    countForReg <- data.frame(count = count, mReg = 0:mLast)
    
    poisFit <- glm(count ~ mReg, family = poisson, data = countForReg)
    
    # new data for predictions in partitions m = mPred
    mNewData <- data.frame(mReg = mPred)
    
    # if nx < ny, set p-value in 0 partition to 1 and 
    # estimate p-value in partition nx
    if (nx < ny) { 
        
      # note: predictions with type="response" are not reliable for small values. 
      # There is not a problem for type="link"
      pPoisCount <- c(B + 1*adjusted,
        exp(predict(poisFit, newdata=mNewData, type="link")))

      pPred <- pPoisCount %*% pmf / (B + 1*adjusted)
    
    # if nx==ny, set both the 0 and nx partition to 1
    } else { 
    
      mNewData <-data.frame(mReg = mNewData$mReg[-nx])
      
      pPoisCount <- c(B + 1*adjusted,
        exp(predict(poisFit, newdata = mNewData, type="link")),
        B + 1*adjusted)
      
      pPred <- pPoisCount %*% pmf / (B + 1*adjusted)

    }
    
    glmSummary <- summary(poisFit)

    ret <- list(pPred = pPred,
      mStop = mStop,
      deviance = glmSummary$deviance,
      aic = glmSummary$aic,
      df.residual = glmSummary$df.residual,
      B = B,
      t0 = t0,
      comparison = attributes(testStat)$comparison,
      summary = attributes(testStat)$summary)
  } else {

    warning(paste("mStop = ", mStop, ": not enough partitions estimated (p-value too small)\nTry increasing B", sep = ""))

    ret <- list(pPred = NA,
      mStop = mStop,
      deviance = NA,
      aic = NA,
      df.residual = NA,
      B = B,
      t0 = t0,
      comparison = attributes(testStat)$comparison,
      summary = attributes(testStat)$summary)
  }
  class(ret) <- "fastPerm"
  
  return(ret)
}

print.fastPerm <- function(fp){
  #' Print function for fastPerm
  #'
  #' This function prints the results of fastPerm
  #' @param fp Output from the fastPerm function
  #' @keywords fastPerm print
  #' @export
  #' @examples
  #' x <- rexp(100, 5)
  #' y <- rexp(100, 2)
  #' print(fastPerm(x, y, testStat = ratioMean))
  
  result <- paste("    fastPerm Two Sample Test   \n\n",
    fp$comparison, " of ", fp$summary, "s\n", 
    prettyNum(fp$B, big.mark = ","), " iterations within partitions",
    "\n\nobserved statistic = ", signif(fp$t0,3),
    "\np-value = ", signif(fp$pPred,3),
    "\nmStop = ", fp$mStop, ", deviance = ", signif(fp$deviance,3), ", AIC = ",
    signif(fp$aic,3), sep = "")
  
  writeLines(result)
}
bdsegal/fastPerm documentation built on July 22, 2019, 1:25 p.m.