R/01-bankStat.R
In NewmanOmics: Extending the Newman Studentized Range Statistic to Transcriptomics

Documented in bankStat createBank

# BANKED

roam <- function(M) {
  as.vector(matrixMean(M))
}

stand <- function(M, mu) {
  sqrt(as.vector(matrixVar(M, mu)))
}

### INTERNAL
### Get the background distribution of "unusual" values in the bank
### KRC: why are we going to all the exta trouble of recomputing the loess
###      by removing each individual sample. Does that even matter?
nuValsIterBank <- function(matrix) {
  n <- nrow(matrix)
  s <- ncol(matrix)
  temp.means = temp.sd = matsdEst = matNu = matrix(NA,n,s)

  for (j in 1:s){
    temp.means[ ,j] <- roam(matrix[ ,-j])
    temp.sd[ ,j] <- stand(matrix[ ,-j], temp.means[,j])
    l.mod <- loess(temp.sd[ ,j] ~ temp.means[ ,j])
    matsdEst[ ,j] <- predict(l.mod)
  }
  matNu <- (matrix - temp.means)/matsdEst
  return(matNu)
}

### EXTERNAL
### prepare a "bank" of normal samples against which to test an individual
createBank <- function(bankMatrix) {
  ## warn if data appears to be on the raw scale
  if (diff(range(bankMatrix)) > 100) {
    warning("createBank: Data appears to be on the raw scale and not on a log scale.")
  }
  ## Compute row-wise mean and standard deviation
  bankMeans  <- roam(bankMatrix)
  bankSD <- stand(bankMatrix, bankMeans)
  ## Fit a loess model, since we assume SD is a smooth function of the mean
  loess.model <- loess(bankSD ~ bankMeans)
  sdEst <- predict(loess.model)
  ## bundle the results
  rowStats <- data.frame(mean=bankMeans, SD=bankSD, sdEst=sdEst)
  ##
  background <- nuValsIterBank(bankMatrix)
  ## return a list
  list(rowStats = rowStats, background = background)
}

### EXTERNAL
### Primary interface; compute the actual Newman banked statistic
bankStat <- function(bankObj, testSet, bankMatrix){
  n <- dim(testSet)[1]
  s <- dim(testSet)[2]

  if(missing(bankObj)){
    bankObj <- createBank(bankMatrix)
  } else if (!missing(bankMatrix)) {
    stop("You should only supply one of 'bankObj' or 'bankMatrix'.")
  }
  temp.means <- bankObj$rowStats$mean
  sdEst <- bankObj$rowStats$sdEst
  Back_dist_iter <- bankObj$background

  matNuBank <- nuValsBank(matrix = testSet, mean = temp.means, sd = sdEst)

  matPBanked <- pValuesBanked(matrix = matNuBank, Background = Back_dist_iter, n, s)

  return(list(nu.statistics = matNuBank, p.values = matPBanked))

}
### If an input is missing, for example the bankObj, run your code as
###       pvals_uncorr = bankStat(,testSet,bankMatrix)

nuValsBank<- function(matrix, mean, sd){
  centered <- sweep(matrix, 1, mean, "-")
  scaled <- sweep(centered, 1, sd, '/')
  return(scaled)
}

## Looks stupid slow. Can we speed it up?
pValuesBankedOrig <- function(matrix, Background, n, s){
  meanDist = mean(Background)
  sdDist = sd(Background)

  matP = matrix(NA,n,s)
  for (i in 1:n){
    for (j in 1:s){
      matP[i,j] <- pnorm(matrix[i,j],meanDist,sdDist)
    }
  }
  return(matP)
}
pValuesBankedSecond <- function(matrix, Background, n, s){
  meanDist = mean(Background)
  sdDist = sd(Background)
  pnorm(matrix, meanDist, sdDist)
}
pValuesBanked <- function(matrix, Background, n, s){
  pnorm(matrix, 0, 1)
}

## If pvals_uncorr is your output, fdr gives you a matrix of corrected p values

# v <- as.vector(pvals_uncorr)
#
# BH_vec = p.adjust(v, method = 'fdr')
# fdr =  as.matrix(BH_vec)
# dim(fdr) = c(n,s)


### The following four functions were written in an effort to find p values efficiently. Even though they work for smaller sets, the run time is significantly less for the built in FDR function ###
altPValues <- function(matrix,Background,n,s){
  matP <- matrix(NA,n,s)
  v = as.vector(Background)
  vec = v[!duplicated(v)]

  for (i in 1:n){
    for (j in 1:s){
      matP[i,j] <- length(vec[vec <= matrix[i,j]])/length(vec)
    }
  }
  return(matP)
}

empiricalP <- function(matrix,background,n,s){
  pdf = approxfun(density(background))
  min = min(background)

  pf = function(q){integrate(pdf,min,q,subdivisions = 2000)$value}


  empirMatP = matrix(NA,n,s)

  for (i in 1:n){
    for (j in 1:s){
      if (matrix[i,j] >= min){empirMatP[i,j] = pf(matrix[i,j])}
      else {empirMatP[i,j] = 0.04}
    }
  }
  return(empirMatP)
}

fdrCorrection <- function(matrix, n, s, verbose = FALSE){

  ## Benjamini Hochberg method employed. Could also use built in p.adjust(p, "BH")
  ## Percent correction only factors in during analysis, b/c (rank/total)*percentageFDR only gives the adjusted cutoff


  v <- as.vector(matrix)
  vUnique <- v[!duplicated(v)]
  vs <- sort(vUnique,decreasing = T)
  vRank <- rank(vs)
  num <- max(vRank)

  adjustedP <- length(vUnique)
  adjustedP[1] <- vs[1]

  ##### if statement

  # breakPt = floor(0.9*num)
  # for (i in 1:breakPt){adjustedP[i] = 0.499}
  #
  # for (i in breakPt+1:num){
  #   adjustedP[i] <- min(adjustedP[i-1],(vs[i]*num)/vRank[i])
  #   if(i%%((num-breakPt)/10) == 0)print((i/(num-breakPt))*100)
  # }
  #

  for (i in 1:num){
    adjustedP[i] <- min(adjustedP[i-1],(vs[i]*num)/vRank[i])
    if(verbose & i %% (num/10) == 0) print((i/num)*100)
  }

  fdrP <- matrix(NA,n,s)
  for (i in 1:n){
    for (j in 1:s){
      fdrP[i,j] <- adjustedP[match(matrix[i,j],vUnique)]
    }
  }
  return(fdrP)
}

fdrCutoff <- function(matrix,alpha){

  v <- as.vector(matrix)
  w <- sort(v)
  threshold <- alpha*(1:length(v))/length(v)
  c <- cbind(v,threshold,v<=threshold)

  sig_value <- c[,1][c[,3] == 1]
  cutoff <- max(sig_value)

  return(cutoff)


  ## If we want to input a cutoff then use this method, else use FDR correction on p-values
  # sigP <- fdrCutoff(matPBanked,alpha = 0.05)
  # binarySigBank <- matrix(0,n,s)
  #
  # for (i in 1:n){
  #   for (j in 1:s){
  #     if (matPBanked[i,j] <= sigP){binarySigBank[i,j] = 1}
  #   }
  #   if(i%%(n/10) == 0)print((i/n)*100)
  # }
}
#####################

Any scripts or data that you put into this service are public.

NewmanOmics documentation built on May 11, 2022, 3 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

NewmanOmics
Extending the Newman Studentized Range Statistic to Transcriptomics

R/01-bankStat.R
In NewmanOmics: Extending the Newman Studentized Range Statistic to Transcriptomics

Defines functions fdrCutoff fdrCorrection empiricalP altPValues pValuesBanked pValuesBankedSecond pValuesBankedOrig nuValsBank bankStat createBank nuValsIterBank stand roam

Documented in bankStat createBank

Try the NewmanOmics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

NewmanOmics Extending the Newman Studentized Range Statistic to Transcriptomics

R/01-bankStat.R In NewmanOmics: Extending the Newman Studentized Range Statistic to Transcriptomics

Defines functions fdrCutoff fdrCorrection empiricalP altPValues pValuesBanked pValuesBankedSecond pValuesBankedOrig nuValsBank bankStat createBank nuValsIterBank stand roam

Documented in bankStat createBank

Try the NewmanOmics package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

NewmanOmics
Extending the Newman Studentized Range Statistic to Transcriptomics

R/01-bankStat.R
In NewmanOmics: Extending the Newman Studentized Range Statistic to Transcriptomics