R/prediction_wbcc.R

##' Predict cell type composition of a whole blood sample based on a given trained model.
##'
##' Predict cell percentages
##' @title prediction_wbcc
##' @param predictor trained model
##' @param data matrix of beta values 
##' @param covariates matrix of covariates 
##' @param transformation depends on the model (e.g. (10^x)-1 if model is log10(cellPercentages+1)). Default is no transformation.
##' @param ncomp optimal number of components
##' @param impute TRUE
##' @param ... extra options for predict()
##' @return predicted cell percentages
##' @author miterson
##' @export
##' @importFrom stats predict coefficients formula lm median
prediction_wbcc <- function(predictor, data, covariates, transformation=function(x) x, ncomp=NULL, impute=TRUE, ...) {
  
  if(is.null(data) | is.null(covariates))               # Check if data and covariates provided
    stop("Both data and covariates must be provided.")
  if(ncol(data) != nrow(covariates))                    # Check dimensions
    stop("Data column number must match covariate row number.")
  if(!all.equal(colnames(data), rownames(covariates)))  # Check same names
    stop("Data column names must match covariate row names.")
  
  matchID <- match(rownames(covariates), colnames(data))
  if(any(is.na(matchID)))
    stop("Data column names must match covariate row names.")
  covariates <- covariates[matchID, ]
  
  if(class(predictor) == "mvr")
    names <- dimnames(coefficients(predictor))[[1]]
  else if(class(predictor) == "matrix")
    names <- rownames(predictor)
  else
    stop(paste("This function is not designed for a", class(predictor), "class of predictor."))
  
  covaNames <- gsub("covariates", "", grep("covariates", names, value=TRUE))
  dataNames <- gsub("data", "", grep("data", names, value=TRUE))

  matchID <- match(covaNames, colnames(covariates))
  if(any(is.na(matchID)))
    stop("Covariates in the same do not match those in the predictor.")
  covariates <- covariates[ , matchID]
  
  if(any(is.na(covariates)) & !impute) {
    stop("Missing values are not allowed in the covariates if imputation is not specified.")
  } 
  else if(any(is.na(covariates)) & impute) {
    print(paste("There are", sum(is.na(covariates)), "NA's in the covariate matrix.",
                "These will be median imputed."))
    covariates <- apply(covariates, 2, function(x) {
      x[is.na(x)] = median(x, na.rm=TRUE)
      x})
  }
  
  matchID <- match(dataNames, rownames(data))
  if(any(is.na(matchID)))
    warning("Row names of the sample do not match those of the predictor.")
  data <- data[matchID, ]
  if(any(is.na(data)) & !impute) {
    stop("Missing values are not allowed in the data if imputation is not specified")
  } 
  else if(any(is.na(data)) & impute) {
    print(paste("There are", sum(is.na(data)), "NA's in the data matrix.",
                "These will be median imputed."))
    nas <- apply(data, 1, function(x) any(is.na(x)))
    data[nas,] <- apply(data[nas, ], 1, function(x) median(x, na.rm=TRUE)) 
    data[is.na(data)] <- median(data, na.rm=TRUE) 
  }
  
  # Prediction
  if(class(predictor) == "mvr") {
    predicted <- predict(predictor, newdata = list(covariates=covariates, data=t(data)), ncomp=ncomp, ...)
    predicted <- predicted[ , , 1]
  }
  else if(class(predictor) == "matrix") {
    predicted <-  cbind(1, covariates, t(data)) %*% predictor
  }
  
  invisible(transformation(predicted))
}
ljsinke/wbccPredictor documentation built on May 15, 2019, 12:07 a.m.