R/makeVectorsFeatureSet.R
In frmaTools: Frozen RMA Tools

Documented in makeVectorsFeatureSet

getProbeMean <- function(x, nb){
  x <- matrix(x, ncol=nb, byrow=TRUE)
  rowMeans(x)
}

getProbeVar <- function(x, nb){
  x <- matrix(x, ncol=nb, byrow=TRUE)
  apply(x, 1, var)
}

getPsetMAD <- function(x, nc, batch.id){
  x <- matrix(x, ncol=nc)
  x.tmp <- split(t(x), batch.id)
  median(unlist(lapply(x.tmp, mad)))
}

rwaFit2 <- function(x1, x2, x3, x4){
  ncols <- ncol(x1)
  w.tmp <- x2/max(x2)
  w.tmp <- matrix(rep(w.tmp, ncols), ncol=ncols)
  pe.tmp <- x3
  pe.tmp[1] <- pe.tmp[1]-sum(pe.tmp)
  rcModelWPLM(y=x1, w=w.tmp, row.effects=pe.tmp, input.scale=x4)
}

#####

makeVectorsFeatureSet <- function(files, batch.id, pkgname, background="rma", normalize="quantile", normVec=NULL, file.dir=".", verbose=TRUE){
  require(oligo)
  
  wd <- getwd()
  setwd(file.dir)
  object <- read.celfiles(filenames=files, pkgname=pkgname, verbose=verbose)
  setwd(wd)
  
  if(verbose) message("Data loaded \n")
  
  batch.size <- table(batch.id)[1]
  if(!all(table(batch.id)==batch.size)) stop("Batches must be of the same size.")

  if(background=="rma"){
    object <- backgroundCorrect(object, verbose=FALSE)
    if(verbose) message("Background Corrected \n")
    gc()
  }

  featureInfo <- getFidProbeset(object)

##  if(target=="core"){
##    featureInfo <- getFidMetaProbesetCore(object)
##  }

  pmi <- featureInfo[["fid"]]
  pns <- as.character(featureInfo[["fsetid"]])
  pms <- exprs(object)[pmi,, drop=FALSE]
  rm(object)
  gc()

  if(normalize=="quantile"){
    if(is.null(normVec)) normVec <- normalize.quantiles.determine.target(pms)
    pms <- normalize.quantiles.use.target(pms, normVec)
    if(verbose) message("Normalized \n")
  }

  pms <- log2(pms)
  gc()
  
  N <- 1:dim(pms)[1]
  S <- split(N, pns)
  nc <- ncol(pms)
  nr <- nrow(pms)
  resids <- matrix(ncol=nc, nrow=nr)
  probeVec <- vector(length=nr)
  if(verbose) message("Beginning Probe Effect Calculation ... \n")
  for(k in 1:length(S)){
    fit <- rcModelPLM(pms[S[[k]],, drop=FALSE])
    resids[S[[k]],] <- fit$Residuals
    probeVec[S[[k]]] <- fit$Estimates[(nc+1):length(fit$Estimates)]
    if((k%%1000)==0){
      message(paste("Finished probeset:",k,"\n"))
      gc()
    }
  }
  if(verbose) message("Probe Effects Calculated \n")
  gc()
  
  tmp <- split(t(resids), batch.id)
  withinMean <- lapply(tmp, getProbeMean, batch.size)
  withinVar <- lapply(tmp, getProbeVar, batch.size)
  withinAvgVar <- rowMeans(matrix(unlist(withinVar), ncol=length(withinVar)))
  btwVar <- apply(matrix(unlist(withinMean), ncol=length(withinMean)), 1, var)
  rm(tmp)
  rm(withinMean)
  rm(withinVar)
  if(verbose) message("Probe Variances Calculated \n")
  gc()
  
  tmp <- split(resids, pns)
  psetMAD <- unlist(lapply(tmp, getPsetMAD, nc, batch.id))
  rm(tmp)
  rm(resids)
  if(verbose) message("Probe Set SDs Calculated \n")
  gc()

  w <- 1/(withinAvgVar + btwVar)
  w[w==Inf] <- 1
  medianSE <- vector(length=length(psetMAD))
  if(verbose) message("Beginning Median SE Calculation ... \n")
  for(k in 1:length(S)){
    fit <- rwaFit2(pms[S[[k]],, drop=FALSE], w[S[[k]]], probeVec[S[[k]]], psetMAD[k])
    medianSE[k] <- median(fit$StdErrors)
    if((k%%1000)==0){
      message(paste("Finished probeset:",k,"\n"))
      gc()
    }
  }
  if(verbose) message("Median SEs Calculated \n")
  gc()

  names(psetMAD) <- names(medianSE) <- unique(pns)
  
  rm(w)
  rm(pms)
  rm(pns)
  gc()

  return(list(normVec=normVec, probeVec=probeVec, probeVarWithin=withinAvgVar, probeVarBetween=btwVar, probesetSD=psetMAD, medianSE=medianSE))
}