R/train_wbcc.R

##' Creates a predictor for the cell type composition of whole blood samples from methylation information.
##'
##' @title Train a cell count predictor
##' @param data matrix of beta values
##' @param covariates matrix of covariates
##' @param cellPercentages matrix of cell percentages 
##' @param model specification (e.g.log10(cellPercentages+1) ~ data + covariates)
##' @param ncomp number of PLS components to be tested
##' @param keep.model logical. Default FALSE means do not return full PLS model.
##' @param ... additional parameters for plsr()
##' @return prediction model plsr object
##' @author miterson
##' @export
##' @importFrom pls plsr

train_wbcc <- function(data, covariates, cellPercentages, model=formula(cellPercentages~covariates+data),
                  ncomp = 50, keep.model = FALSE, ...){

  if(is.null(data) | is.null(covariates) | is.null(cellPercentages))
    stop("Data, covariates, and cell percentages must be provided.")
  if(ncol(data) != nrow(covariates) | nrow(covariates) != nrow(cellPercentages))
    stop("Data column number must match covariate and cell percentages row number.")
  if(!all.equal(colnames(data), rownames(covariates)) | !all.equal(rownames(covariates), rownames(cellPercentages)))
    stop("Data column names must match covariate and cell percentages row names.")
  
  matchID <- match(rownames(covariates), colnames(data))
  if(any(is.na(matchID)))
    stop("Data column names must match covariate row names.")
  covariates <- covariates[matchID, ]
  matchID <- match(rownames(covariates), colnames(data))
  if(any(is.na(matchID)))
    stop("Data column names must match cell percentage row names.")
  cellPercentages <- cellPercentages[matchID, ]
  
  if(any(is.na(sum(data))) | any(is.na(sum(covariates))) | any(is.na(sum(cellPercentages))))
    stop("Missing values are not allowed when training the predictor.")
  
  ## Model training using PLSR
  predictor <- plsr(model, ncomp=ncomp, data=list(cellPercentages = cellPercentages, covariates=covariates, 
                    data=t(data)), ...)
  
  ## Remove model if not being kept
  if(!keep.model)
    predictor$model <- NULL
  invisible(predictor)
}
ljsinke/wbccPredictor documentation built on May 15, 2019, 12:07 a.m.