R/mahRain.R

Defines functions .classwiseMahalanobisRain .mahDist

Documented in .classwiseMahalanobisRain .mahDist

#' @include global.R
#' @include themes.R
#' @import methods
#' @import stats
NULL

#' Mahalanobis distance of droplets from a distribution.
#'
#' Find the Mahalanobis distance of all droplets from a given distribution.
#'
#' @param droplets A data frame of droplets with \code{Ch1.Amplitude} and
#' \code{Ch2.Amplitude} columns.
#' @param clusStats A list of statistics for a cluster generated by
#' \code{\link{classStats}}. This should have a \code{mean}, \code{cov} and
#' \code{cov.inv} values.
#'
#' @return A vector of numbers, where each one corresponds to the distance of
#' that droplet from the given distribution.
#'
#' @author Anthony Chiu, \email{anthony.chiu@cruk.manchester.ac.uk}

.mahDist <- function(droplets, clusStats) {
  if(is.null(clusStats$cov.inv)) {
    return(0)
  } else {
    n <- seq_along(nrow(droplets))
    x <- split(droplets[, c("Ch1.Amplitude", "Ch2.Amplitude")], n)
    mapply(mahalanobis, x,
           MoreArgs=list(center=clusStats$mean, cov=clusStats$cov.inv,
                         inverted=TRUE))
  }
}

#' Fuzzy clusters by bivariate normal distributions.
#'
#' Assume that the class \code{cl} is bivariate normally distributed. This
#' method adds fuzziness to the class. Other classes are not changed.
#'
#' @param droplets A data frame of droplets with \code{Ch1.Amplitude} and
#' \code{Ch2.Amplitude} columns, as well as a class column (see the parameter
#' \code{classCol}).
#' @param cl The class to focus on. This should be either "NN", "NP", "PN" or
#' "PP".
#' @param maxDistance An integer corresponding to the maximum Mahalanobis
#' distance for which we will consider points to be members of the class, i.e.
#' this is the level outside of which we consider droplets to be too far from
#' the cluster.
#' @param classCol The column (name or number) from \code{droplets}
#' representing the class.
#'
#' @return A factor corresponding to the class column with \code{Rain} entries
#' added for the class \code{cl}.
#'
#' @author Anthony Chiu, \email{anthony.chiu@cruk.manchester.ac.uk}

.classwiseMahalanobisRain <- function(droplets, cl, maxDistance=30,
                                      classCol="class") {
  clusStats <- classStats(droplets, classCol=classCol)[[cl]]

  # Retain the droplets within the chosen maxDistance of the mean of its class.
  droplets[droplets[, classCol] == cl, classCol] <-
    ifelse(
      .mahDist(droplets[droplets[, classCol] == cl, ],
               clusStats=clusStats) <= maxDistance,
      as.character(droplets[droplets[, classCol] == cl, classCol]),
      ddpcr$rain
    )

  factor(droplets[, classCol], levels=ddpcr$classesRain)
}


#' Define 'rain' (unclassified) droplets by fitting the clusters to
#' bivariate normal distributions.
#'
#' Assume that each of the classified clusters are bivariate normally
#' distributed. We add fuzziness to the classifications by assigning droplets
#' far away from the centres as "Rain". We use the Mahalanobis distance for
#' each cluster to determine whether a droplet is 'too far away'.
#'
#' @param droplets A \code{ddpcrWell} or \code{ddpcrPlate} object, or
#' a data frame of droplets with "Ch1.Amplitude" and "Ch2.Amplitude" columns,
#' as well as a class column.
#' @param cMethod The name or column number of the classification for which we
#' want to add rain to.
#' @param maxDistances A list of (levels) with keys in \code{c("NN", "PN",
#' "NP", "PP")}. If the list is empty or set as \code{NULL}, no rain is added.
#' If set as a single integer \code{n}, this value is taken for all classes,
#' i.e. \code{list("NN"=n, "PN"=n, "NP"=n, "PP"=n)}.
#' @param ... Other options depending on the type of \code{droplets}.
#'
#' @return An object where the specified class has "Rain" entries added.
#'
#' @name mahalanobisRain
#'
#' @author Anthony Chiu, \email{anthony.chiu@cruk.manchester.ac.uk}
#'
#' @aliases multivariateRain multivariateNormalRain mvNormalRain mvnRain
#'
#' @examples
#' ## Take a data frame of droplets of transform it into the rigth format.
#' droplets <- KRASdata[["E03"]]
#' droplets$Cluster <- relabelClasses(droplets, classCol="Cluster")
#'
#' ## Add rain as a new column.
#' droplets$ClusterMahRain <-
#'     mahalanobisRain(droplets, cMethod="Cluster", fullTable=FALSE)
#' table(droplets$ClusterMahRain)
#'
#' ## The maximum distance around each mean can be changed uniformly.
#' droplets$ClusterMahRain <-
#'     mahalanobisRain(droplets, cMethod="Cluster", maxDistances=35,
#'                     fullTable=FALSE)
#' table(droplets$ClusterMahRain)
#'
#' ## Or we can change the maximum distances for each individual cluster.
#' droplets$ClusterMahRain <-
#'     mahalanobisRain(droplets, cMethod="Cluster",
#'                     maxDistances=list(NN=35, NP=30, PN=30, PP=30),
#'                     fullTable=FALSE)
#' table(droplets$ClusterMahRain)
#'
#' # This method works the same for ddpcrWell objects.
#' aWell <- ddpcrWell(well=KRASdata[["E03"]])
#' aWell <- mahalanobisRain(aWell, cMethod="Cluster")
#' table(wellClassification(aWell, cMethod="ClusterMahRain"))
#'
#' # Likewise for ddpcrPlate objects.
#' krasPlate <- ddpcrPlate(wells=KRASdata[c("E03", "H03", "C04", "F04")])
#' krasPlate <- mahalanobisRain(krasPlate, cMethod="Cluster")
#' lapply(plateClassification(krasPlate, cMethod="ClusterMahRain"), table)
#'
#' @export

setGeneric(
  "mahalanobisRain", function(droplets, cMethod, maxDistances=30,...) {
    standardGeneric("mahalanobisRain")
})


#' @rdname mahalanobisRain
#'
#' @param fullTable If \code{TRUE}, returns a full data frame of droplets and
#' their classification; if \code{FALSE}, simply returns a factor corresponding
#' to this classification. Defaults to \code{TRUE}.
#'
#' @exportMethod mahalanobisRain

setMethod("mahalanobisRain", "data.frame",
  function(droplets, cMethod, maxDistances=30, fullTable=TRUE) {
    # maxDistances is just a number---assume that all of the classes
    # have the same maximum distances
    if(is.numeric(maxDistances) && length(maxDistances) == 1) {
      n <- length(ddpcr$classes)
      maxDistances <- setNames(rep(maxDistances, n), ddpcr$classes)
    } else if(is.null(maxDistances)) {
      # Assume that an empty vector means no rain.
      maxDistances <- list()
    } else if(!is.list(maxDistances)) {
      # Not a list and none of the conditions above---do not know how to
      # handle.
      stop("'maxDistances' should be a named list with names in c(",
           ddpcr$nn, ", ", ddpcr$np, ", ", ddpcr$pn, ", ",
           ddpcr$pp, ")")
    }

    # Loop through the maxDistances and check where droplets lie.
    for(className in names(maxDistances)) {
      droplets[, cMethod] <-
        .classwiseMahalanobisRain(droplets, className,
                                  maxDistances[[className]],
                                  classCol=cMethod)

    }

    if(fullTable) {
      droplets[, c("Ch1.Amplitude", "Ch2.Amplitude", cMethod)]
    } else {
      droplets[, cMethod]
    }
  }
)


#' @rdname mahalanobisRain
#'
#' @exportMethod mahalanobisRain

setMethod("mahalanobisRain", "ddpcrWell",
  function(droplets, cMethod, maxDistances=30) {
    cl <- wellClassification(droplets, cMethod=cMethod, withAmplitudes=TRUE)
    cl <- mahalanobisRain(cl, cMethod=cMethod, maxDistances=maxDistances,
                          fullTable=FALSE)
    wellClassification(droplets, paste0(cMethod, "MahRain")) <- cl
    droplets
  }
)


#' @rdname mahalanobisRain
#'
#' @exportMethod mahalanobisRain

setMethod("mahalanobisRain", "ddpcrPlate",
  function(droplets, cMethod, maxDistances=30) {
    cl <- plateClassification(droplets, cMethod=cMethod, withAmplitudes=TRUE)
    cl <- do.call(rbind, cl)
    rainy <- mahalanobisRain(cl, cMethod=cMethod, maxDistances=maxDistances,
                             fullTable=FALSE)
    plateClassification(droplets, paste0(cMethod, "MahRain")) <- rainy
    droplets
  }
)

Try the twoddpcr package in your browser

Any scripts or data that you put into this service are public.

twoddpcr documentation built on Nov. 8, 2020, 5:49 p.m.