R/fabatchaddon.R
In bapred: Batch Effect Removal and Addon Normalization (in Phenotype Prediction using Gene Data)

Documented in fabatchaddon

fabatchaddon <-
  function(params, x, batch) {
    
    if(any(is.na(x)))
      stop("Data contains missing values.")
    if(!is.factor(batch))
      stop("'batch' has to be of class 'factor'.")  
    if(!is.matrix(x))
      stop("'x' has to be of class 'matrix'.") 
    if(max(table(batch)) >= ncol(x))
      stop("The sample size within each batch has to be smaller than the number of variables.")
    
    if(!inherits(params, "fabatch"))
      stop("Input parameter 'params' has to be of class 'fabatch'.")
    
    if(ncol(params$xadj) != ncol(x))
      stop("Number of variables in test data matrix different to that of training data matrix.")	 
    
    batches = levels(batch)
    nbatches = length(batches)
    nvars <- ncol(x)
    
    sdb = as.list(rep(0,nbatches))
    for (i in 1:nbatches) {
      sdb[[i]] = apply(x[batch==batches[i],],2,sd)
    }
    sdb0=sdb
    
    badvariableslisthere <- lapply(sdb, function(x) which(x==0))
    badvariableshere <- sort(unique(unlist(badvariableslisthere)))
    goodvariableshere <- setdiff(1:nvars, badvariableshere)
    
    if(nbatches > 1) {
      # Remove batch specific means:
      adjustmentmod = lm(x~batch)
      design = model.matrix(~batch)
      adjustmentcoef = coef(adjustmentmod)
      xb = x-design%*%adjustmentcoef
      adjustmentcoef0 = adjustmentcoef
    }
    else
      xb = scale(x, scale=FALSE)
    
    pooledsds <- apply(xb, 2, sd)   
    
    # 'scaledxb' is X centered and scaled per batch:    
    scaledxb = xb
    for (i in 1:nbatches) {
      scaledxb[batch==batches[i],setdiff(1:nvars, badvariableslisthere[[i]])] = 
        scale(xb[batch==batches[i],setdiff(1:nvars, badvariableslisthere[[i]])],center=rep(0,nvars-length(badvariableslisthere[[i]])),scale=sdb[[i]][setdiff(1:nvars, badvariableslisthere[[i]])])
    }   
    
    # Predict probabilities using the model estimated on the training data:
    p1x = as.vector(1/(1+exp(-params$b0-as.matrix(scaledxb)[,setdiff(1:nvars, params$badvariables)]%*%params$b))) # mean(ifelse(p1x>0.5,2,1)!=ytest)
    
    
    herealsogood <- lapply(badvariableslisthere, 
                           function(x) which(setdiff(1:nvars, params$badvariables) %in% setdiff(1:nvars, x)))
    goodinboth <- lapply(badvariableslisthere,
                         function(x) intersect(setdiff(1:nvars, params$badvariables), setdiff(1:nvars, x)))
    
    
    scaledxbfa <- scaledxb
    
    for (i in 1:nbatches) {
      
      # Determine number of factors if not given:    
      if (is.null(params$nbfinput)){
        ##require("mnormt")
        nbf = nbfactors(scale(sweep(scale(scaledxb[batch==batches[i],goodinboth[[i]]],center=params$m1[herealsogood[[i]]],scale=FALSE), 1, 1-p1x[batch==batches[i]], "*") + 
                                sweep(scale(scaledxb[batch==batches[i],goodinboth[[i]]],center=params$m2[herealsogood[[i]]],scale=FALSE), 1, p1x[batch==batches[i]], "*")), maxnbfactors=min(c(floor(sum(batch==batches[i])/2), 12)), minerr=params$minerr)$optimalnbfactors
      }
      else
        nbf <- params$nbfinput  
      
      if(is.na(nbf)) {
        warning("There occured an issue in the factor estimation. Number of factors set to zero.")
        nbf <- 0
      }   
      
      if(nbf > 0) {
        # Calculate the factors on 'cdta' (batch-centered-scaled and class-removed):
        fa = emfahighdim(sweep(scale(scaledxb[batch==batches[i],goodinboth[[i]]],center=params$m1[herealsogood[[i]]],scale=FALSE), 1, 1-p1x[batch==batches[i]], "*") + 
                           sweep(scale(scaledxb[batch==batches[i],goodinboth[[i]]],center=params$m2[herealsogood[[i]]],scale=FALSE), 1, p1x[batch==batches[i]], "*"),nbf=nbf,minerr=params$minerr)
        
        # Remove the factor influences:
        scaledxbfa[batch==batches[i],goodinboth[[i]]] = scaledxb[batch==batches[i],goodinboth[[i]]] - fa$Factors%*%t(fa$B)
      }
      else {
        scaledxbfa[batch==batches[i],goodinboth[[i]]] <- scaledxb[batch==batches[i],goodinboth[[i]]]
        fa <- NULL
      }
      
    } 
    
    means2batch <- sd2batch <- matrix(nrow=length(levels(batch)), ncol=ncol(scaledxbfa))
    
    # scale again:    
    for (i in 1:nbatches) {
      means2batch[i,] <- colMeans(scaledxbfa[batch==batches[i],])
      sd2batch[i,] <- apply(scaledxbfa[batch==batches[i],], 2, sd)
      sd2batch[i,][sd2batch[i,]==0] <- 1
      scaledxbfa[batch==batches[i],] = scale(scaledxbfa[batch==batches[i],], center=means2batch[i,], scale=sd2batch[i,])
    }
    
    xadj <- sweep(sweep(scaledxbfa, 2, params$pooledsds, "*"), 2, params$meanoverall, "+")
    
    if(length(params$badvariables) > 0) {
      for(i in 1:nbatches) {
        if(length(setdiff(params$badvariables, badvariableslisthere[[i]])) > 0) {
          heregood <- setdiff(params$badvariables, badvariableslisthere[[i]])
          meanssave <- colMeans(xadj[batch==batches[i],heregood, drop=FALSE])
          xadj[batch==batches[i],heregood] <- scale(xadj[batch==batches[i],heregood, drop=FALSE], center=TRUE, scale=TRUE)
          xadj[batch==batches[i],heregood] <- sweep(sweep(xadj[batch==batches[i],heregood, drop=FALSE], 2, pooledsds[heregood], "*"), 2, meanssave, "+")  		 
        }
      }	 
    }
    
    return(xadj)
    
  }

Any scripts or data that you put into this service are public.

bapred documentation built on June 22, 2022, 9:08 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

bapred
Batch Effect Removal and Addon Normalization (in Phenotype Prediction using Gene Data)

R/fabatchaddon.R
In bapred: Batch Effect Removal and Addon Normalization (in Phenotype Prediction using Gene Data)

Defines functions fabatchaddon

Documented in fabatchaddon

Try the bapred package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

bapred Batch Effect Removal and Addon Normalization (in Phenotype Prediction using Gene Data)

R/fabatchaddon.R In bapred: Batch Effect Removal and Addon Normalization (in Phenotype Prediction using Gene Data)

Defines functions fabatchaddon

Documented in fabatchaddon

Try the bapred package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

bapred
Batch Effect Removal and Addon Normalization (in Phenotype Prediction using Gene Data)

R/fabatchaddon.R
In bapred: Batch Effect Removal and Addon Normalization (in Phenotype Prediction using Gene Data)