R/evalDetection.R

Defines functions evalDetection

Documented in evalDetection

#' @rdname evalDetection
#'
#' @title Evaluate detection performance of a signal detector
#' @description For a given cutpoint (e.g., previously estimated with the
#'     function estimateCutPoint), 'evalDetection' will return the evaluation of
#'     the methylation signal into two clases: signal from control and signal
#'     from treatment samples.
#' @details  The regulatory methylation signal is also an output from a natural
#'     process that continuously takes place across the ontogenetic development
#'     of the organisms. So, we expect to see methylation signal under natural, 
#'     ordinary conditions. Here, to evaluate the performance of signal
#'     classification obtained with the application of some classifier/detector
#'     or rule, the cross-tabulation of observed and predicted classes with
#'     associated statistics are calculated with function
#'     \code{\link[caret]{confusionMatrix}} fron package "caret".
#'
#'     A classification result with low accuracy and compromising values from
#'     other classification performance indicators (see below) suggest that the
#'     treatment does not induce a significant regulatory signal different
#'     from control.
#'
#' @param LR A list of GRanges objects (LR) including control and treatment
#'     GRanges containing divergence values for each cytosine site in the
#'     meta-column. LR can be generated, for example, by the function
#'     \code{\link[MethylIT]{estimateDivergence}}. Each GRanges object must
#'     correspond to a sample. For example, if a sample is named 's1', then
#'     this sample can be accessed in the list of GRanges objects as LR$s1.
#' @param control.names Names/IDs of the control samples, which must be include
#'     in the variable LR.
#' @param treatment.names Names/IDs of the treatment samples, which must be
#'     included in the variable LR.
#' @param cutpoint Cutpoint to select DIMPs. Cytosine positions with divergence
#'     greater than 'cutpoint' will selected as DIMPs. Cutpoints are estimated
#'     with the function 'estimateCutPoint'. Cytosine positions with divergence
#'     values greater than the cutpoint are considered members of the
#'     "positive class".
#' @param div.col Column number for divergence variable used in the ROC
#'     analysis and estimation of the cutpoints.
#' @param seed Random seed used for random number generation.
#' @param verbose if TRUE, prints the function log to stdout
#'
#' @return the list with the statisitics returned by the function
#'     \code{\link[caret]{confusionMatrix}} fron package "caret".
#'
#' @examples
#' set.seed(123) #'#' To set a seed for random number generation
#' #'#' GRanges object of the reference with methylation levels in
#' #'#' its matacolumn
#' num.points <- 5000
#' Ref <- makeGRangesFromDataFrame(
#'   data.frame(chr = '1',
#'              start = 1:num.points,
#'              end = 1:num.points,
#'              strand = '*',
#'              p1 = rbeta(num.points, shape1 = 1, shape2 = 1.5)),
#'   keep.extra.columns = TRUE)
#'
#' #'#' List of Granges objects of individuals methylation levels
#' Indiv <- GRangesList(
#'   sample11 = makeGRangesFromDataFrame(
#'     data.frame(chr = '1',
#'                start = 1:num.points,
#'                end = 1:num.points,
#'                strand = '*',
#'                p2 = rbeta(num.points, shape1 = 1.5, shape2 = 2)),
#'     keep.extra.columns = TRUE),
#'   sample12 = makeGRangesFromDataFrame(
#'     data.frame(chr = '1',
#'                start = 1:num.points,
#'                end = 1:num.points,
#'                strand = '*',
#'                p2 = rbeta(num.points, shape1 = 1.6, shape2 = 2)),
#'     keep.extra.columns = TRUE),
#'   sample21 = makeGRangesFromDataFrame(
#'     data.frame(chr = '1',
#'                start = 1:num.points,
#'                end = 1:num.points,
#'                strand = '*',
#'                p2 = rbeta(num.points, shape1 = 40, shape2 = 4)),
#'     keep.extra.columns = TRUE),
#'   sample22 = makeGRangesFromDataFrame(
#'     data.frame(chr = '1',
#'                start = 1:num.points,
#'                end = 1:num.points,
#'                strand = '*',
#'                p2 = rbeta(num.points, shape1 = 41, shape2 = 4)),
#'     keep.extra.columns = TRUE))
#' #'#' To estimate Hellinger divergence using only the methylation levels.
#' HD <- estimateDivergence(ref = Ref, indiv = Indiv, meth.level = TRUE,
#'                          columns = 1)
#' res <- evalDetection(LR  = HD, control.names = c("sample11", "sample12"),
#'                      treatment.names = c("sample21", "sample22"),
#'                      cutpoint = 0.85, div.col = 3L, seed=1234, verbose=TRUE)
#' @importFrom GenomicRanges GRanges GRangesList
#' @importFrom caret confusionMatrix
#' @export
evalDetection <- function(LR, control.names, treatment.names, cutpoint,
                          div.col=7L, seed=1234, verbose=TRUE) {

  set.seed(seed)
  ## ======== data preprocessing ======== ##
  sn <- names(LR)
  idx.ct <- match(control.names, sn)
  idx.tt <- match(treatment.names, sn)
  CT <- GRangesList(LR[idx.ct])
  CT <- unlist(CT)
  TT <- GRangesList(LR[idx.tt])
  TT <- unlist(TT)

  CntrlClass <- rep( "CT", length(CT))
  PredCntrlClass <- CntrlClass
  PredCntrlClass[abs(mcols(CT[, div.col])[, 1]) > cutpoint ] <- "TT"

  TreatClass <- rep( "TT", length(TT))
  PredTreatClass <- TreatClass
  PredTreatClass[abs(mcols(TT[, div.col])[, 1]) < cutpoint ] <- "CT"

  conf.m = table(c(PredCntrlClass, PredTreatClass), c(CntrlClass, TreatClass))
  res  <- confusionMatrix(data = conf.m, positive = "TT")

  return(res)
}
genomaths/MethylIT.utils documentation built on Nov. 26, 2019, 2:10 a.m.