R/slinky_scoring.R

Defines functions .zs

Documented in .zs

#' diffexp
#'
#' Calculate differential expression scores, subsetting by plate.
#' 
#' @param x An object of class Slinky
#' @param treat  A SummarizedExperiment containing the treated
#'     samples, or the pert_iname of desired perturbagen. See details.
#' @param control  A SummarizedExperiment containing the control
#'     samples, or the pert_iname of desired controls. Default is 'auto'.
#'     See details.
#' @param method  Scoring method to use.  Only \code{cd} and
#'     \code{ks} are presently supported.
#' @param split_by_plate  Should the analysis be split by plate?
#'     This is one way to control for batch effects, but requires at
#'     least two treated sample and two control samples on each plate in
#'     the dataset. Default is FALSE. Not supported for method = 'ks'.
#' @param where_clause  If treat is a pert_iname, further query
#'     terms may be specified here (e.g. \code{pert_type=\"trt_sh\"}).
#' @param gold  Restrict analysis to gold instances as defined by
#'     LINCS. Ignored if treat and control are SummarizedExperiments.
#' @param inferred  Should the inferred (non-landmark) genes be
#'     included in the analysis? Default is TRUE.
#' @param verbose  Do you want to know how things are going?
#'     Default is FALSE.
#' @param ...  Additional arguments for \code{method}.
#' @return Vectors of scores, one per subset (plate).
#'     This function looks for \code{rna_plate} in
#'     \code{colData(treat)} and \code{colData(control)} to slice the data
#'     into subsets, and then performs differential expression analysis on
#'     the subsets. If a perturbation
#'     identifier is provided instead of an SummarizedExperiment, the
#'     necessary SummarizedExperiment is constructed by calling this
#'     package's \code{toSummarizedExperiment} function (which requires
#'     that you have
#'     initialized this class with appropriate clue.io key and location of
#'     gctx file). Note that the control dataset can be automatically
#'     generated by the default option of \code{control=\"auto\"}. In this
#'     case, appropriate same-plate controls are identified for the samples
#'     in the treat dataset and loaded. For more complex queries, you can
#'     create the requisite SummarizedExperiments yourself with
#'     \code{toSummarizedExperiment}, or
#'     create a SummarizedExperiment by any other methods, ensuring that
#'     \code{treat} and \code{control} contain the
#'     \code{rna_plate} metadata variable for subsetting.
#'     Note that this function assumes that each plate represented in
#'     \code{treat} is also represented in \code{control}
#' @name diffexp
#' @rdname diffexp
#' @examples 
#' #'
#' # for build/demo only.  You MUST use your own key when using the slinky
#' # package.
#' user_key <- httr::content(httr::GET('https://api.clue.io/temp_api_key'),
#'                           as='parsed')$user_key
#' sl <- Slinky(user_key,
#'                  system.file('extdata', 'demo.gctx',
#'                       package='slinky'),
#'                  system.file('extdata', 'demo_inst_info.txt',
#'                      package = 'slinky'))
#' scores <- diffexp(sl, sl[,1:5], sl[,18:22])
#' head(scores)
#'
setGeneric("diffexp",
           function(x,
                    treat,
                    control = "auto",
                    method = "cd",
                    split_by_plate = FALSE,
                    where_clause = list(),
                    gold = TRUE,
                    inferred = TRUE,
                    verbose = FALSE,
                    ...) {
             standardGeneric("diffexp")
           })
#' @examples
#' # for build/demo only.  You MUST use your own key when using the slinky
#' # package.
#' \dontrun{
#' user_key <- httr::content(httr::GET('https://api.clue.io/temp_api_key'),
#'                           as='parsed')$user_key
#' sl <- Slinky(user_key,
#'                  system.file('extdata', 'demo.gctx',
#'                       package='slinky'),
#'                  system.file('extdata', 'demo_inst_info.txt',
#'                      package = 'slinky'))
#'cd_vector <- diffexp(sl, 
#'                     treat = "amoxicillin", 
#'                     split_by_plate = FALSE, 
#'                     verbose = FALSE)
#' }
#' @rdname diffexp
#' @importFrom dplyr %>%
#' @exportMethod diffexp
#' @aliases diffexp,Slinky-method
setMethod("diffexp", signature(x = "Slinky"),
          function(x,
                   treat,
                   control = "auto",
                   method = "cd",
                   split_by_plate = FALSE,
                   where_clause = list(),
                   gold = TRUE,
                   inferred = TRUE,
                   verbose = FALSE,
                   ...)
          {
            if (inferred){
              row.ix <- seq_len(nrow(x))
            } else {
              row.ix <- seq_len(min(nrow(x), 978))
            }
            
            if (is(treat, "character")) {

              where_clause$pert_iname = treat
              
              if (gold) {
                where_clause$is_gold = TRUE
              }
              
              fields <- c("rna_plate", "distil_id")
              if (verbose)
                message("Loading data for 'treat' group.")
              treat <- loadL1K(x[row.ix, ],
                               where_clause = where_clause,
                               fields = fields,
                               inferred = inferred,
                               verbose = verbose)
              if (verbose)
                message(paste0("\nLoaded ", ncol(treat), " treated samples."))
              
            } else if (is(treat, "Slinky")) {
              treat <- as(treat[row.ix, ], "SummarizedExperiment")
            } else if (!is(treat, "SummarizedExperiment")) {
              stop(
                "diffexp expects either the pert_iname of the perturbagen, ",
                "a Slinky object, or a SummarizedExperiment for the ",
                "'treat' dataset"
              )
            }
            if (is(control, "character")) {
              if (control == "auto") {
                if (verbose)
                  message("\nLocating and loading control samples.")
                ids <- controls(x[row.ix, ],
                                treat$distil_id,
                                verbose = verbose)$distil_id
                control <- loadL1K(x[row.ix, which(colnames(x) %in% ids)], 
                          inferred = inferred)
                if (verbose)
                  message(paste0("\nLoaded ",
                                 ncol(control),
                                 " control samples."))
              } else  {
                where_clause <- list(pert_iname = control)
                if (gold)
                  where_clause$is_gold = TRUE
                fields <- c("rna_plate", "distil_id")
                
                if (verbose)
                  message("Loading data for 'control' group.")
                control <- loadL1K(x[row.ix, ],
                                   where_clause = where_clause,
                                   fields = fields,
                                   inferred = inferred,
                                   verbose = verbose)
                
                if (verbose)
                  message(paste0("Loaded ", ncol(control), " control samples."))
              }
            } else if (is(control, "Slinky")) {
              control <- as(control[row.ix, ], "SummarizedExperiment")
            } else if (!is(control, "SummarizedExperiment")) {
              stop(
                "diffexp expects either the pert_iname of the perturbagen, ",
                "a Slinky object, or a SummarizedExperiment for the ",
                "'treat' dataset"
              )
            }
            
            if (method == "cd") {
              if (verbose)
                message("Calculating CD scores.")
              if (split_by_plate) {
                rna_plate <- NULL # prevent no visible binding on R CMD CHECK
                . <- NULL # prevent no visible binding on R CMD CHECK
                cds <- as.data.frame(SummarizedExperiment::colData(treat[row.ix, ])) %>%
                  dplyr::group_by(rna_plate) %>%
                  dplyr::do(cd =
                    chDir(x, treat[row.ix, which(treat$rna_plate %in% .$rna_plate)],
                    control[row.ix , which(control$rna_plate %in% .$rna_plate)]))
                # flatten structure to matrix
                cds <- do.call(cbind, cds$cd) %>% `colnames<-`(cds$rna_plate)
              } else {
                cds = chDir(x, treat[row.ix, ], control[row.ix, ])
              }
              return(cds)
            } else if (method == "ks") {
              if (verbose)
                message("Calculating KS scores.")
              if (split_by_plate) {
                warning("Method ks does not support split_by_plate option. ",
                        "Ignoring. ")
              }
              zs <- rzs(x, treat[row.ix, ], control[row.ix, ])
              return(ks(x, zs))
            } else {
              stop("Only 'cd' and 'ks' are currently supported by diffexp.")
            }
          })

#' rzs
#'
#' Convert each sample in \code{treat} to robust zscore.
#' 
#' @param x An object of class Slinky
#' @param treat  A SummarizedExperiment containing the
#'     treated samples,
#'     or the pert_iname of desired perturbagen. See details.
#' @param control  An SummarizedExperiment containing the control
#'     samples, or the pert_iname of desired controls. Default is 'auto'.
#'     See details.
#' @param where_clause  If treat is a pert_iname, further query
#'     terms may be specified here (e.g. \code{pert_type=\"trt_sh\"}).
#' @param gold  Restrict analysis to gold instances as defined by
#'     LINCS. Ignored if treat and control are SummarizedExperiments.
#' @param inferred  Should the inferred (non-landmark) genes be
#'     included in the analysis? Default is TRUE.
#' @param byplate  Do you want to split the scores by plate? This is 
#'     usually wise, unless you have already subsetted \code{treat} and 
#'     \code{control} samples in such a way that plate can safely be ignored, 
#'     or if \code{treat} and \code{control} must come from different plates 
#'     for some reason. Default is TRUE.
#' @param verbose  Do you want to know how things are going?
#'     Default is FALSE.
#' @param ...  Additional arguments for \code{method}.
#' @return Matrix of zscore of same dimension as
#'     \code{treat} (or the expression matrix resulting from querying
#'     for \code{treat} if a pert_iname is specified).
#'     This function identifes same-plate controls for
#'     each treated sample, then converts each treated sample to robust
#'     z-score by subtracting the median control values and dividing by
#'     the (scaled) median absolute deviations.
#' @name rzs
#' @rdname rzs
setGeneric("rzs",
           function(x,
                    treat,
                    control = "auto",
                    where_clause = list(),
                    gold = TRUE,
                    inferred = TRUE,
                    byplate = TRUE,
                    verbose = FALSE,
                    ...) {
             standardGeneric("rzs")
           })
#' @rdname rzs
#' @importFrom dplyr %>%
#' @exportMethod rzs
#' @aliases rzs,Slinky-method
#' @examples 
#' #'
#' # for build/demo only.  You MUST use your own key when using the slinky
#' # package.
#' user_key <- httr::content(httr::GET('https://api.clue.io/temp_api_key'),
#'                           as='parsed')$user_key
#' sl <- Slinky(user_key,
#'                  system.file('extdata', 'demo.gctx',
#'                       package='slinky'),
#'                  system.file('extdata', 'demo_inst_info.txt',
#'                      package = 'slinky'))
#' scores <- rzs(sl, "amoxicillin")
#' head(scores)
setMethod("rzs", signature(x = "Slinky"),
          function(x,
                   treat,
                   control = "auto",
                   where_clause = list(),
                   gold = TRUE,
                   inferred = TRUE,
                   byplate = TRUE,
                   verbose = FALSE,
                   ...)
          {
            if (inferred){
              row.ix <- seq_len(nrow(x))
            } else {
              row.ix <- seq_len(min(nrow(x), 978))
            }
            
            if (is(treat, "character")) {
              where_clause$pert_iname = treat
              
              if (gold) {
                where_clause$is_gold = TRUE
              }
              
              fields <- c("rna_plate", "distil_id")
              if (verbose)
                message("Loading data for 'treat' group.")
              tryCatch({
                treat <- loadL1K(x[row.ix,],
                                 where_clause = where_clause,
                                 fields = fields,
                                 verbose = verbose)
                },
                error = function(e) {
                  stop(e)
                },
                warning = function(w) {
                  if(grepl("no results", w$message)) {
                    stop(paste0("No treated instances found for ", treat, " and is_gold=", gold))
                  } else {
                    stop(paste0("Unexpected error when loading treated instances: ", w$message))
                  }
                }
              )
              if (verbose)
                message(paste0("\nLoaded ", ncol(treat), " treated samples."))
              
            } else if (is(treat,  "Slinky")) {
              treat <- as(treat[row.ix, ], "SummarizedExperiment")
            } else if (!is(treat, "SummarizedExperiment")) {
              stop(
                "rzs expects either the pert_iname of the perturbagen ",
                "or a SummarizedExperiment Set for the 'treat' dataset"
              )
            }

            if (is(control, "character")) {
              if (control == "auto") {
                if (verbose)
                  message("\nLocating and loading control samples.")

                ids <- controls(x,
                                treat$distil_id,
                                verbose = verbose)$distil_id
                
                  control <- loadL1K(x[row.ix, 
                                       which(colnames(x) %in% ids)])
                if (verbose)
                  message(paste0("\nLoaded ",
                                 ncol(control),
                                 " control samples."))
              } else  {
                where_clause <- list(pert_iname = control)
                if (gold)
                  where_clause$is_gold = TRUE
                fields <- c("rna_plate", "distil_id")
                
                if (verbose)
                  message("Loading data for 'control' group.")
                
                control <- loadL1K(x[row.ix, ],
                                   where_clause = where_clause,
                                   fields = fields,
                                   verbose = verbose)
                if (verbose)
                  message(paste0("Loaded ", ncol(control), 
                                 " control samples."))
              }
            } else if (is(control, "Slinky")) {
              control <- as(control[row.ix, ], "SummarizedExperiment")
            } else if (!is(control, "SummarizedExperiment")) {
              stop(
                "rzs expects either 'auto', the pert_iname of the perturbagen or",
                "a SummarizedExperiment for the 'control' dataset"
              )
            }
            rna_plate <- NULL # prevent no visible binding on R CMD CHECK
            . <- NULL # prevent no visible binding on R CMD CHECK
            if (byplate) {
              zs <- as.data.frame(SummarizedExperiment::colData(treat)) %>%
                dplyr::group_by(rna_plate) %>%
                dplyr::do(z = 
                            .zs(assays(treat)[[1]][, which(treat$rna_plate %in% .$rna_plate)],
                                assays(control)[[1]][, which(control$rna_plate %in% .$rna_plate)])
                )
              # flatten structure to matrix
              zs <- do.call(cbind, zs$z) %>% `colnames<-`(base::colnames(treat))
            } else {
              zs <- .zs(assays(treat)[[1]], assays(control)[[1]])
            }
            zs
          })


#' .zs
#'
#' The .zs function provides method for calculating robust z-scores
#' @param  treat a matrix of values for the treated samples
#' @param  control a matrix of values for the control samples
#' @return A vector of z-scores.
#'
#' @name .zs
#' @importFrom dplyr %>%
#' @rdname slinky-internal
.zs <- function(treat, control) {
  "Internal function for converting expression values to robust  z-scores"
  meanad <- function(x) {
    mean(abs(x - mean(x)) * 1.253314)
  }
  
  medians <- apply(as.matrix(control), 1, median)
  mads <- apply(as.matrix(control), 1, mad)
  meanads <- apply(as.matrix(control), 1, meanad)
  ix <- which(mads == 0)
  mads[ix] <- meanads[ix]
  
  apply(as.matrix(treat), 2, function(x) {
    (x - medians) / mads
  })
}
VanAndelInstitute/slinky documentation built on Aug. 20, 2021, 1:05 p.m.