R/integration.R

#Global variables
eraseRandPairInteRepeat <-TRUE  # random pairwise integration = allowing repeat.



#' Runs Integration step and calculates the p-value
#'
#' @param rdaSnpAnn1 the transformed zscores rda file name
#' generated by the randomisation step-MAE for the
#' SNPannotation1
#' - *obsRandomValsMean_zscoreNmlDistr.rda
#' @param rdaSnpAnn2 the transformed zscores rda file name
#' generated by the randomisation step-MAE for the
#' SNPannotation2
#' - *obsRandomValsMean_zscoreNmlDistr.rda
#' @param outFilePrefix The names of all the output files
#' generated will be assigned this prefix
#' @param alphaVal indicates the relative weight of
#' SNPannotaion1. Numeric data type with values
#' from 0 to 1. default = 0.5
#' @param nIterations number of random selction iterations
#' default =10000
#' @param seedValue value of the seed to be set.
#' default = NULL = No seed set
#'
#' @return the pnorm p-value for the alpha in parameter
#' alphaVal
#' @export
#'
#' @examples
integrationPvalCalc <- function(rdaSnpAnn1, rdaSnpAnn2, outFilePrefix, alphaVal=0.5, nIterations=10000, seedValue=NULL){
#rdaSnpAnn1 <- rdaFiles[1]; rdaSnpAnn2 <- rdaFiles[2]; alphaVal <- 0.5; nIterations <- 10000
  #outFilePrefix <- "test"; seedValue <- NULL
  cat("\n Integration & p-value calculation step -" )
  date()

  cat("\n Integration for alpha=", alphaVal, "start..\n")
  intOutRda <- runIntegration4alpha ( rdaSnpAnn1,rdaSnpAnn2, alphaVal, nIterations, outFilePrefix, seedValue)
    #returns the output rda file name
  cat("\n Integration for alpha=", alphaVal, "complete")
  date()

  #calculate the pval
  cat("\n Calculating p-value for: ", alphaVal, "\n")

  val <- getPval4Alpha (intOutRda)
  cat("\n Pval :", val ,"\n")

  date()
  cat("\n Integration & p-value calculation step end \n" )
  return(val)
}

runIntegration4alpha <- function ( ref1.rda,ref2.rda, alpha, nIterations, outPrefix, seedVal) {
  #Run integrate() to integrate the randomisation done with ref1 and ref2
  # for an alpha value

  cat( "\n\nIntegrating ")
  cat("\n ", ref1.rda , "\n ", ref2.rda )
  cat("\n nIterations = ", nIterations, "\n alpha = ", alpha , "\n")

  #<tiss>_ASEsNonASEs_<ref1><ref2>_Randomisation_nIter<nIterations>_alpha<alpha>
  fname.prefix <- paste0(outPrefix, "_alpha", alpha )

  # Ref1 randomisation values
  load(ref1.rda)
  obs.r1 <- obs.zscore
  ran.r1 <- ran.zscores

  rm(obs.zscore, ran.zscores)

  # Ref2 randomisation values
  load(ref2.rda)
  obs.r2 <- obs.zscore
  ran.r2 <- ran.zscores

  rm(obs.zscore, ran.zscores)

  #Running integration
  fname <- fname.prefix
  #cat ("\n Integration start : ", date(), "\n")
  retval <- integrate(obs.r1, ran.r1, obs.r2, ran.r2, alpha, nIterations, fname, seedVal)
  #cat ("\n Integration end :",  date(), "\n")

  rm( obs.r1, ran.r1, obs.r2, ran.r2, alpha, nIterations, fname)
  return(retval)
  ##End running integration
}


#Integrate
#obs.ref1<-obs.r1.mean; ran.ref1 <-ran.r1.mean ;obs.ref2<- obs.r2.mean; ran.ref2 <- ran.r2.mean; fname.prefix <-fname
integrate <- function(obs.ref1, ran.ref1, obs.ref2, ran.ref2, alpha, nIterations, fname.prefix, seedval=NULL) {
  # For a  alpha, integrate obs values (obs.ref1, obs.ref2) for the 2 ref datasets and random values for the same - save the integrated vals

  #alpha = measure of confidence in the ref1
  #(1-alpha) = measure of confidence in the ref2

  #integrated.obs = (obs.ref1 * alpha )+ (obs.ref2 (1-alpha) )

  #nIterations = no. of iterations for the pairwise integration of the random values ( ran.ref1, ran.ref2)
  #For the nIter, randomly select an element from ran.ref1 and ran.ref2,
  #integrated.ran =   (ran.ref1 * alpha )+ (ran.ref2 (1-alpha) )

  #integrated values and alpha saved
  #in file : <fname.prefix>_integratedVals.rda

  # return the rda file name the integrated values are saved - integrated.obs, integrated.random, alpha

  if (!is.null(seedval)){
    set.seed(seedval)
    cat("\n Seed set=", seedval, "\n")
  } else {
    cat("\n No seed set \n")
  }


  int.obs <- (obs.ref1 * alpha )+ (obs.ref2  *(1-alpha) )

  nIter <- 1:nIterations
  nElts2Sel <- 1  # nElements selected is 1 for each iteration
  int.ran <-  lapply( nIter, function( y ) {
    r.r1 <-  sample(x=ran.ref1 , size=nElts2Sel, replace=eraseRandPairInteRepeat )
    r.r2 <-  sample(x=ran.ref2 , size=nElts2Sel, replace=eraseRandPairInteRepeat )
    i.ran =   (r.r1 * alpha )+ (r.r2 * (1-alpha) )
    return(i.ran)
  } )
  int.ran <-  unlist( int.ran)

  fname <- paste0(fname.prefix, "_integratedVals.rda" )
  save(int.obs, int.ran , alpha,  file=fname, compress=TRUE)

  cat ("\nOutput dir : ", getwd() , "\nOutput file : ", fname , "\n")

  #Saving the distribution of the integrated obs and ran
  p.fname <- paste0(fname.prefix, "_intValDistr.pdf" )
  title <- gsub(".pdf" , "", p.fname)
  title <- paste0( title, "\nDistribution of integrated observed and random values \nObserved value = ", int.obs )

  plotDistributionVals (int.obs, int.ran, p.fname,  title)

  cat ("\nIntegrated values distribution plot saved to : ", p.fname , "\n")

  return( fname )
}

getPval4Alpha <- function(integratedRda) {
  #function returns the pval calculated

  load(integratedRda)
  vals <- calculateRanObsZscoresPnorm ( int.obs, int.ran ) # returns the (obs.zscore, ran.zscores, pnorm.val )
  pnorm.pval <- unlist ( vals[PNORM] )
  return(  pnorm.pval )
}
karishdsa/ERASE documentation built on May 9, 2019, 2:55 p.m.