R/AllClasses.R

Defines functions PreprocConfig NucseqPreprocConfig SS2PreprocConfig DefaultPreprocConfig .valid_complexity_cutoff .valid_index is_config_object QCResults is_qc_results_object ScandalDataSet scat is_scandal_object is_valid_assay NODE_ID PROJ_ID DEFAULT_CELL_2_NODE_MAP ScandalMetaprograms

Documented in DefaultPreprocConfig NucseqPreprocConfig PreprocConfig QCResults ScandalDataSet SS2PreprocConfig

#' @title S4 classes definition
#' @details Includes the generics definition file
#' @include AllGenerics.R
NULL # Do not remove me!!!

### =========================================================================
### PreprocConfig objects (start)
### -------------------------------------------------------------------------
###

setClassUnion("ListOrVector", c("list", "vector"))
setClassUnion("NumericOrVector", c("numeric", "vector"))

#'
#' @title PreprocConfig class
#'
#' @description An S4 class for storing preprocessing configuration parameters.
#'
#' @slot complexityCutoff A numeric vector of length 2 representing the lower and
#' upper bounds of complexity (i.e. the number of detected genes per cell).
#' @slot expressionCutoff A numeric representing the minimal log2 mean expression
#' per gene below which a gene is considered lowly expressed.
#' @slot housekeepingCutoff A numeric representing the log2 mean expression of
#' house-keeping genes (i.e. genes that are highly expressed in all cells) per
#' cell below which a cell is considered low quality.
#' @slot logBase A numeric representing the logarithm base for performing log
#' transformation on the data.
#' @slot scalingFactor A numeric representing a scaling factor by which to divide
#' each data point before log transformation.
#' @slot pseudoCount A numeric representing the pseudo count added when performing
#' log transformation to avoid taking the log of zero.
#' @slot typeMatrix A logical indicating if the dataset should be represented using
#' the S4 Matrix class (instead of base R matrix) to reduce memory overhead using
#' sparse matrix representation.
#'
#' @section Methods:
#' \describe{
#'   \item{\code{complexityCutoff}}{Getter/setter for the complexity cutoff}
#'   \item{\code{expressionCutoff}}{Getter/setter for the expression cutoff}
#'   \item{\code{housekeepingCutoff}}{Getter/setter for the housekeeping cutoff}
#'   \item{\code{logBase}}{Getter/setter for the log base}
#'   \item{\code{scalingFactor}}{Getter/setter for the scaling factor}
#'   \item{\code{pseudoCount}}{Getter/setter for the pseudo count}
#'   \item{\code{typeMatrix}}{Getter/setter for the Matrix type}
#' }
#'
#' @examples
#' pc <- PreprocConfig(complexityCutoff = c(0, 10000),
#'                     expressionCutoff = 5,
#'                     housekeepingCutoff = 7,
#'                     logBase = 2,
#'                     scalingFactor = 10,
#'                     pseudoCount = 1,
#'                     typeMatrix = TRUE)
#'
#' logBase(pc) # Equals 2
#' logBase(pc) <- 10
#' logBase(pc) # Equals 10
#'
#' @aliases PreprocConfig
#'
#' @author Avishay Spitzer
#'
#' @export
setClass("PreprocConfig",
         slots = c(complexityCutoff = "ListOrVector",
                   expressionCutoff = "NumericOrVector",
                   housekeepingCutoff = "NumericOrVector",
                   logBase = "numeric",
                   scalingFactor = "numeric",
                   pseudoCount = "numeric",
                   typeMatrix = "logical"))

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Constructors
###

#'
#' @param complexityCutoff A numeric vector of length 2 representing the lower and
#' upper bounds of complexity (i.e. the number of detected genes per cell).
#' @param expressionCutoff A numeric representing the minimal log2 mean expression
#' per gene below which a gene is considered lowly expressed.
#' @param housekeepingCutoff A numeric representing the log2 mean expression of
#' house-keeping genes (i.e. genes that are highly expressed in all cells) per
#' cell below which a cell is considered low quality.
#' @param logBase A numeric representing the logarithm base for performing log
#' transformation on the data.
#' @param scalingFactor A numeric representing a scaling factor by which to divide
#' each data point before log transformation.
#' @param pseudoCount A numeric representing the pseudo count added when performing
#' log transformation to avoid taking the log of zero.
#' @param typeMatrix A logical indicating if the dataset should be represented using
#' the S4 Matrix class (instead of base R matrix) to reduce memory overhead using
#' sparse matrix representation.
#'
#' @describeIn PreprocConfig-class Constructs a new \code{PreprocConfig} object.
#'
#' @importFrom methods new
#'
#' @export
PreprocConfig <- function(complexityCutoff, expressionCutoff, housekeepingCutoff, logBase, scalingFactor, pseudoCount, typeMatrix) {
  cp <- new("PreprocConfig",
            complexityCutoff = complexityCutoff,
            expressionCutoff = expressionCutoff,
            housekeepingCutoff = housekeepingCutoff,
            logBase = logBase,
            scalingFactor = scalingFactor,
            pseudoCount = pseudoCount,
            typeMatrix = typeMatrix)

  return (cp)
}

#'
#' @describeIn PreprocConfig-class constructs default preprocessing configuration for data
#' generated from frozen samples using SmartSeq2 protocol (single-nuclei sequencing).
#'
#' @export
NucseqPreprocConfig <- function() {
  return (PreprocConfig(complexityCutoff = c(2000, 6000), expressionCutoff = 5, housekeepingCutoff = 7, logBase = 2, scalingFactor = 10, pseudoCount = 1, typeMatrix = TRUE))
}

#'
#' @describeIn PreprocConfig-class constructs default preprocessing configuration for data
#' generated from fresh samples using SmartSeq2 protocol.
#'
#' @export
SS2PreprocConfig <- function() {
  return (PreprocConfig(complexityCutoff = c(3000, 8000), expressionCutoff = 4, housekeepingCutoff = 7, logBase = 2, scalingFactor = 10, pseudoCount = 1, typeMatrix = TRUE))
}

#'
#' @describeIn PreprocConfig-class constructs default preprocessing configuration
#' (currently trhe selected default configuration is Nucseq).
#'
#' @export
DefaultPreprocConfig <- function() {
  return (NucseqPreprocConfig())
}

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Validity
###

.valid_complexity_cutoff <- function(x) {
  if (!is.numeric(x))
    return (sprintf("complexity cutoff must be of numeric type"))
  if (length(x) != 2)
    return (sprintf("complexity cutoff must be a numeric vector of length equals to 2"))
  if (x[1] < 0)
    return (sprintf("lower bound of complexity cutoff must be greater than or equal to 0"))
  if (x[2] <= x[1])
    return (sprintf("upper bound of complexity cutoff must be greater than lower bound"))

  return (NULL)
}

setValidity("PreprocConfig", function(object) {

  if (is.list(object@complexityCutoff)) {
    for (co in object@complexityCutoff) {
      res <- .valid_complexity_cutoff(co)

      if (!is.null(res))
        return (res)
    }
  } else {
    res <- .valid_complexity_cutoff(object@complexityCutoff)

    if (!is.null(res))
      return (res)
  }

  if (length(object@expressionCutoff) > 1){
    if (is.null(names(object@expressionCutoff)))
      return (sprintf("expression cutoff vector must be a named vector"))
  }

  if (any(object@expressionCutoff <= 0))
    return (sprintf("expression cutoff must be greater than or equal to 0"))

  if (length(object@housekeepingCutoff) > 1){
    if (is.null(names(object@housekeepingCutoff)))
      return (sprintf("housekeeping cutoff vector must be a named vector"))
  }

  if (any(object@housekeepingCutoff <= 0))
    return (sprintf("housekeeping cutoff must be greater than or equal to 0"))

  if (object@logBase <= 0)
    return (sprintf("log base must be greater than 0"))

  if (object@scalingFactor <= 0)
    return (sprintf("scaling factor must be greater than 0"))

  if (object@pseudoCount <= 0)
    return (sprintf("Pseudo count must be greater than 0"))

  return(TRUE)
})

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Getters and setters.
###

#'
#' @rdname PreprocConfig-class
#'
#' @export
setMethod("complexityCutoff", "PreprocConfig", function(x, sid = NULL) {
  if (is.list(x@complexityCutoff) & !is.null(sid))
    return (x@complexityCutoff[[sid]])

  return(x@complexityCutoff)
})

#'
#' @param x a \code{PreprocConfig} object.
#' @param value a value to replace the currently set value.
#'
#' @rdname PreprocConfig-class
#'
#' @export
setReplaceMethod("complexityCutoff", "PreprocConfig", function(x, value) {
  x@complexityCutoff <- value
  return(x)
})

#'
#' @rdname PreprocConfig-class
#'
#' @export
setMethod("expressionCutoff", "PreprocConfig", function(x, sid = NULL) {
  if (length(x@expressionCutoff) > 1 & !is.null(sid))
    return (x@expressionCutoff[sid])

  return(x@expressionCutoff)
})

#'
#' @rdname PreprocConfig-class
#'
#' @export
setReplaceMethod("expressionCutoff", "PreprocConfig", function(x, value) {
  x@expressionCutoff <- value
  return(x)
})

#'
#' @rdname PreprocConfig-class
#'
#' @export
setMethod("housekeepingCutoff", "PreprocConfig", function(x, sid = NULL) {
  if (length(x@housekeepingCutoff) > 1 & !is.null(sid))
    return (x@housekeepingCutoff[sid])

  return(x@housekeepingCutoff)
})

#'
#' @rdname PreprocConfig-class
#'
#' @export
setReplaceMethod("housekeepingCutoff", "PreprocConfig", function(x, value) {
  x@housekeepingCutoff <- value
  return(x)
})

#'
#' @rdname PreprocConfig-class
#'
#' @export
setMethod("logBase", "PreprocConfig", function(x) {
  return(x@logBase)
})

#'
#' @rdname PreprocConfig-class
#'
#' @export
setReplaceMethod("logBase", "PreprocConfig", function(x, value) {
  x@logBase <- value
  return(x)
})

#'
#' @rdname PreprocConfig-class
#'
#' @export
setMethod("scalingFactor", "PreprocConfig", function(x) {
  return(x@scalingFactor)
})

#'
#' @rdname PreprocConfig-class
#'
#' @export
setReplaceMethod("scalingFactor", "PreprocConfig", function(x, value) {
  x@scalingFactor <- value
  return(x)
})

#'
#' @rdname PreprocConfig-class
#'
#' @export
setMethod("pseudoCount", "PreprocConfig", function(x) {
  return(x@pseudoCount)
})

#'
#' @rdname PreprocConfig-class
#'
#' @export
setReplaceMethod("pseudoCount", "PreprocConfig", function(x, value) {
  x@pseudoCount <- value
  return(x)
})

#'
#' @rdname PreprocConfig-class
#'
#' @export
setMethod("typeMatrix", "PreprocConfig", function(x) {
  return(x@typeMatrix)
})

#'
#' @rdname PreprocConfig-class
#'
#' @export
setReplaceMethod("typeMatrix", "PreprocConfig", function(x, value) {
  x@typeMatrix <- value
  return(x)
})

.valid_index <- function(x, i) {

  if (is.character(i))
    return(all(i %in% names(x)))
  else if (is.numeric(i)) {
    return (all(!(is.null(x[i]) | is.na(x[i]))))
  }

  return (FALSE)
}

setMethod("[", "PreprocConfig", function(x, i, ...) {

  if (missing(i))
    return(x)

  if (is.list(x@complexityCutoff)) {

    stopifnot(.valid_index(x@complexityCutoff, i))

    if (length(i) > 1)
      x@complexityCutoff <- x@complexityCutoff[i]
    else
      x@complexityCutoff <- x@complexityCutoff[[i]]
  }

  if (length(x@expressionCutoff) > 1) {

    stopifnot(.valid_index(x@expressionCutoff, i))

    x@expressionCutoff <- x@expressionCutoff[i]
  }

  if (length(x@housekeepingCutoff) > 1) {

    stopifnot(.valid_index(x@housekeepingCutoff, i))

    x@housekeepingCutoff <- x@housekeepingCutoff[i]
  }

  return (x)
})

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Display
###

#'
#' @rdname PreprocConfig-class
#'
#' @export
setMethod("show", "PreprocConfig", function(object) {

  if (is.list(complexityCutoff(object))) {

    str <- sprintf("Complexity cutoff:")
    for (n in names(complexityCutoff(object))) {
      co <- complexityCutoff(object)[[n]]
      str <- paste0(str, sprintf(" %s=(%d, %d)", n, co[1], co[2]))
    }

    cat(paste0(str, "\n"))

  } else
    cat(sprintf("Complexity cutoff: (%d, %d)\n", complexityCutoff(object)[1], complexityCutoff(object)[2]))

  if (length(expressionCutoff(object)) == 1)
    cat("Expression cutoff:", expressionCutoff(object), "\n")
  else {

    str <- sprintf("Expression cutoff:")
    for (n in names(expressionCutoff(object))) {
      co <- expressionCutoff(object)[n]
      str <- paste0(str, sprintf(" %s=%.2f", n, co))
    }

    cat(paste0(str, "\n"))
  }

  if (length(housekeepingCutoff(object)) == 1)
    cat("Housekeeping cutoff:", housekeepingCutoff(object), "\n")
  else {

    str <- sprintf("Housekeeping cutoff:")
    for (n in names(housekeepingCutoff(object))) {
      co <- housekeepingCutoff(object)[n]
      str <- paste0(str, sprintf(" %s=%.2f", n, co))
    }

    cat(paste0(str, "\n"))
  }

  cat("Log base:", logBase(object), "\n")
  cat("Scaling factor:", scalingFactor(object), "\n")
  cat("Pseudo count:", pseudoCount(object), "\n")
  cat("Matrix type:", typeMatrix(object), "\n")
})

#' @importFrom methods is
is_config_object <- function(object) { return (!is.null(object) & is(object, "PreprocConfig")) }

### -------------------------------------------------------------------------
### PreprocConfig objects (end)
### =========================================================================
###

### =========================================================================
### QCResults objects (start)
### -------------------------------------------------------------------------
###

#'
#' @title QCResults class
#'
#' @description An S4 class for storing quality control results.
#'
#' @slot preprocConfig A object of class \linkS4class{PreprocConfig}.
#' @slot cellIDs A character vector of IDs of cells that passed QC.
#' @slot geneIDs A character vector of IDs of genes that passe QC.
#' @slot statsQC A \linkS4class{DataFrame} containing statistics gathered through
#' quality control process.
#'
#' @section Methods:
#' \describe{
#'   \item{\code{preprocConfig}}{Getter for the \linkS4class{PreprocConfig} object
#'   used when creating the quality control result.}
#'   \item{\code{cellIDs}}{Getter for the cell IDs that passed QC.}
#'   \item{\code{geneIDs}}{Getter for the gene IDs that passed QC}
#'   \item{\code{statsQC}}{Getter for the quality control statistics}
#' }
#'
#' @examples
#'
#' @aliases QCResults
#'
#' @author Avishay Spitzer
#'
#' @export
setClass("QCResults",
         slots = c(preprocConfig = "PreprocConfig",
                   cellIDs = "vector",
                   geneIDs = "vector",
                   statsQC = "DataFrame"))

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Constructors
###

#'
#' @describeIn QCResults-class Constructs a new \code{QCResults} object.
#'
#' @importFrom methods new
#' @importClassesFrom S4Vectors DataFrame
#' @importFrom S4Vectors DataFrame
#'
#' @export
QCResults <- function(preprocConfig, cellIDs = c(), geneIDs = c(), statsQC = DataFrame()) {
  qc <- new("QCResults",
            preprocConfig = preprocConfig,
            cellIDs = cellIDs,
            geneIDs = geneIDs,
            statsQC = statsQC)

  return (qc)
}

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Getters and setters.
###

#'
#' @rdname QCResults-class
#'
#' @export
setMethod("preprocConfig", "QCResults", function(object) {
  return(object@preprocConfig)
})

#'
#' @rdname QCResults-class
#'
#' @export
setMethod("cellIDs", "QCResults", function(object) {
  return(object@cellIDs)
})

#'
#' @rdname QCResults-class
#'
#' @export
setReplaceMethod("cellIDs", "QCResults", function(object, value) {
  object@cellIDs <- value
  return(object)
})

#'
#' @rdname QCResults-class
#'
#' @export
setMethod("geneIDs", "QCResults", function(object) {
  return(object@geneIDs)
})

#'
#' @rdname QCResults-class
#'
#' @export
setReplaceMethod("geneIDs", "QCResults", function(object, value) {
  object@geneIDs <- value
  return(object)
})

#'
#' @rdname QCResults-class
#'
#' @export
setMethod("statsQC", "QCResults", function(object) {
  return(object@statsQC)
})

#'
#' @rdname QCResults-class
#'
#' @export
setReplaceMethod("statsQC", "QCResults", function(object, value) {
  object@statsQC <- value
  return(object)
})

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Display
###

#'
#' @rdname QCResults-class
#'
#' @export
setMethod("show", "QCResults", function(object) {
  cat("cellIDs:", length(cellIDs(object)), "IDs\n")
  cat("geneIDs:", length(geneIDs(object)), "IDs\n")
  show(statsQC(object))
})

#' @importFrom methods is
is_qc_results_object <- function(object) { return (!is.null(object) & is(object, "QCResults")) }

### -------------------------------------------------------------------------
### QCResults objects (end)
### =========================================================================
###

### =========================================================================
### ScandalDataSet objects (start)
### -------------------------------------------------------------------------
###

#' @import Matrix
setClassUnion("MatrixOrNULL", c("Matrix", "matrix", "NULL"))

#'
#' @title ScandalDataSet class
#'
#' @description An S4 class for storing single-cell seqeuncing data, reduced
#' dimensions representations of the data reuqired in the analysis process such as
#' t-SNE and UMAP coordinates and the end-product of the analysis which are the
#' transcriptional programs.
#'
#' @details The S4 class \code{ScandalDataSet} inherits from and extends Bioconductor's
#' base class for single-cell related applications, the \linkS4class{SingleCellExperiment}
#' class.
#' The idea behind \link{scandal} is that in order to detect intra-tumor heterogeneity
#' one needs inspect each tumor individually to collect the different transcriptomic
#' programs that can be found within each tumor and then assess these programs at the
#' level of the entire dataset to define the programs that generalize best
#' (meta-programs).
#' Besides the functionality supplied by its superclasses, \code{ScandalDataSet}
#' supplies methods to keep
#'
#' @slot unprocessedData A read-only matrix that contains the unprocessed data that
#' allows re-accessing this data without the need to read it from file. Sparse matrix
#' representation as well as maintaining a single copy for the entire objects tree
#' decreases the memory overhead of this approach.
#' @slot preprocConfig A configuration object of class \linkS4class{PreprocConfig}.
#' @slot qualityControl A \linkS4class{SimpleList} object containing objects of class
#' \code{QCResults} representing the quality control results, i.e. the cells and
#' genes in each individual node that passed qaulity control and are available for
#' downstream analysis.
#' @slot nodeID A unique character identifier of the constructed \code{ScandalDataSet}
#' object that should represent the specific sample.
#' @slot projectID A character identifier common to all the nodes in the constructed
#' \code{ScandalDataSet} object.
#' @slot cell2SampleMap A **function** that maps a vector of cell IDs to a vector of
#' sample IDs to which the cells belong.
#'
#' @section Constructor:
#' Constructs a \code{ScandalDataSet} object.
#'
#' @section Methods:
#' \describe{
#'   \item{\code{logtpm}}{Getter/setter for the logtpm assay}
#'   \item{\code{unprocessedData}}{Getter for the unprocessedData (read-only)}
#'   \item{\code{preprocConfig}}{Getter for the preprocConfig (read-only)}
#'   \item{\code{qualityControl}}{Getter/Setter for the qualityControl list}
#'   \item{\code{nodeID}}{Getter/setter for the nodeID}
#'   \item{\code{projectID}}{Getter/setter for the projectID}
#'   \item{\code{sampleIDs}}{Returns a character vector containing the IDs of all
#'   samples in the dataset.}
#'   \item{\code{inspectSamples}}{Returns a ScandalDataSet object representing a
#'   set of specific samples.}
#'   \item{\code{cell2SampleMap}}{Getter for the cell2SampleMap function (read-only)}
#' }
#'
#' @seealso \linkS4class{SummarizedExperiment}, \linkS4class{SingleCellExperiment}, \link{scandal_preprocess}
#'
#' @examples
#' # Building a mock dataset with 30 cells and 100 genes
#' ngenes <- 100
#' ncells <- 30
#' dataset <- matrix(sample(0:1e4, ngenes * ncells, replace = FALSE), nrow = ngenes, ncol = ncells)
#' rownames(dataset) <- sapply(seq_len(ngenes), function(x) paste0("GENE", x))
#' colnames(dataset) <- c(sapply(seq_len(ncells / 2), function(x) paste0("TUMOR1-Cell", x)),
#'                        sapply(seq(from = ncells / 2 + 1, to = ncells), function(x) paste0("TUMOR2-Cell", (x - ncells/2))))
#'
#' # Declare a global confguration object for the top-level ScandalDataSet object and
#' # a named list of configuration objects for each single tumor. Note that the names
#' # of the elements in the named list correspond to the names of the tumors that appear
#' # in the column names
#' global_config <- PreprocConfig(complexityCutoff = c(0, 10000), expressionCutoff = 1, housekeepingCutoff = 1, logBase = 2, scalingFactor = 1, pseudoCount = 1, typeMatrix = TRUE)
#' tumor_config <- list(TUMOR1 = PreprocConfig(complexityCutoff = c(0, 10000), expressionCutoff = 1, housekeepingCutoff = 1, logBase = 2, scalingFactor = 1, pseudoCount = 1, typeMatrix = TRUE),
#'                      TUMOR2 = PreprocConfig(complexityCutoff = c(0, 10000), expressionCutoff = 1, housekeepingCutoff = 1, logBase = 2, scalingFactor = 1, pseudoCount = 1, typeMatrix = TRUE))
#'
#' # Instantiate a new ScandalDataSet object
#' sds <- ScandalDataSet(assays = list(tpm = dataset), preprocConfig = global_config, nodeID = "Example1", projectID = "Project1")
#'
#' sds # Prints a user-readable summary of sds
#'
#' all(colnames(sds) == colnames(dataset)) # TRUE
#' all(rownames(sds) == rownames(dataset)) # TRUE
#' sampleIDs(sds) # Return a vector (TUMOR1, TUMOR2)
#' qualityControl(sds) # Empty list
#' nodeID(sds) # Returns "Example1"
#' projectID(sds) # Returns "Project1"
#'
#' @rdname ScandalDataSet
#'
#' @author Avishay Spitzer
#'
#' @import SingleCellExperiment
#' @import SummarizedExperiment
#' @importClassesFrom S4Vectors SimpleList
#'
#' @export
#' @exportClass ScandalDataSet
setClass("ScandalDataSet",
         slots = c(unprocessedData = "MatrixOrNULL",
                   preprocConfig = "PreprocConfig",
                   qualityControl = "SimpleList",
                   nodeID = "character",
                   projectID = "character",
                   cell2SampleMap = "function"),
         contains = "SingleCellExperiment"
)

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Constructor
###

#  Will be added by Roxygen to the class documentation

#' @usage
#' ## Constructor
#' ScandalDataSet(..., preprocConfig = DefaultPreprocConfig(),
#'   nodeID = NODE_ID(), projectID = PROJ_ID())
#'
#' @param ... arguments to pass to the \linkS4class{SingleCellExperiment} constructor.
#' @param preprocConfig a configuration object of class \linkS4class{PreprocConfig}
#' @param nodeID a unique identifier of the constructed \code{ScandalDataSet}
#' object that should represent the specific sample. If not supplied a unique ID
#' will be generated randomly however it is advised to set this field.
#' @param projectID an identifier common to all the nodes in the constructed
#' \code{ScandalDataSet} object.  If not supplied a unique ID
#' will be generated randomly however it is advised to set this field.
#' @param cell2SampleMap a **function** that maps a vector of cell IDs to a vector of
#' sample IDs to which the cells belong. The default function assumes that the cell ID
#' is a string separated by "-" and that the node ID is contained in the substring
#' until the first "-" character.
#'
#' @importClassesFrom S4Vectors DataFrame SimpleList
#' @importFrom S4Vectors SimpleList
#' @importFrom S4Vectors DataFrame
#' @importFrom methods new is as
#'
#' @export
ScandalDataSet <- function(..., preprocConfig = DefaultPreprocConfig(), nodeID = NODE_ID(), projectID = PROJ_ID(), cell2SampleMap = DEFAULT_CELL_2_NODE_MAP) {

  sce <- SingleCellExperiment(...)

  if(!is(sce, "SingleCellExperiment")) {
    sce <- as(sce, "SingleCellExperiment")
  }

  object <- new("ScandalDataSet", sce,
                unprocessedData = assay(sce),
                preprocConfig = preprocConfig,
                qualityControl = SimpleList(),
                nodeID = nodeID,
                projectID = projectID,
                cell2SampleMap = cell2SampleMap)

  int_colData(object)$Scandal <- DataFrame(row.names = colnames(object))
  int_elementMetadata(object)$Scandal <- DataFrame(row.names = rownames(object))
  int_metadata(object)$Scandal <- list()

  int_metadata(object)$Scandal[["Version"]] <- 1.0

  return (object)
}

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Validity
###

setValidity("ScandalDataSet", function(object) {

  if (length(qualityControl(object)) > 0) {

    is_qc_valid <- sapply(qualityControl(object), function(c) is(c, "QCResults"))

    if (!(base::all(is_qc_valid) == TRUE))
      return (sprintf("Every QC node must be a QCResults object"))
  }

  return (TRUE)
})

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Getters and setters.
###

#'
#' @rdname ScandalDataSet
#'
#' @export
setMethod("logtpm", "ScandalDataSet",   function(object, ...) {
  return (assay(object, i = "logtpm", ...))
})

#'
#' @param object a \code{ScandalDataSet} object.
#' @param value a value to replace the currently set value (applies to all setter methods).
#'
#' @rdname ScandalDataSet
#'
#' @export
setReplaceMethod("logtpm", c("ScandalDataSet", "ANY"),   function(object, ..., value) {
  assay(object, i = "logtpm", ...) <- value
  return (object)
})

#'
#' @rdname ScandalDataSet
#'
#' @export
setMethod("qualityControl", "ScandalDataSet", function(object) {
  return(object@qualityControl)
})

#'
#' @rdname ScandalDataSet
#'
#' @export
setReplaceMethod("qualityControl", "ScandalDataSet", function(object, value) {
  object@qualityControl <- value
  return(object)
})

#'
#' @rdname ScandalDataSet
#'
#' @export
setMethod("nodeID", "ScandalDataSet", function(object) {
  return(object@nodeID)
})

#'
#' @rdname ScandalDataSet
#'
#' @export
setReplaceMethod("nodeID", "ScandalDataSet", function(object, value) {
  object@nodeID <- value
  return(object)
})

#'
#' @rdname ScandalDataSet
#'
#' @export
setMethod("projectID", "ScandalDataSet", function(object) {
  return(object@projectID)
})

#'
#' @rdname ScandalDataSet
#'
#' @export
setMethod("sampleIDs", "ScandalDataSet", function(object) {
  return(unique(cell2SampleMap(object)(colnames(object))))
})

#'
#' @rdname ScandalDataSet
#'
#' @export
setMethod("unprocessedData", "ScandalDataSet", function(object) {
  return (object@unprocessedData)
})

#'
#' @rdname ScandalDataSet
#'
#' @export
setMethod("preprocConfig", "ScandalDataSet", function(object) {
  return(object@preprocConfig)
})

##' @export
#setReplaceMethod("preprocConfig", "ScandalDataSet", function(x, value) {
#  x@preprocConfig <- value
#  return(x)
#})

#'
#' @rdname ScandalDataSet
#'
#' @export
setMethod("cell2SampleMap", "ScandalDataSet", function(object) {
  return (object@cell2SampleMap)
})

#'
#' @rdname ScandalDataSet
#'
#' @importClassesFrom S4Vectors DataFrame SimpleList
#' @importFrom S4Vectors SimpleList
#' @importFrom S4Vectors DataFrame
#'
#' @export
setMethod("inspectSamples", "ScandalDataSet", function(object, sampleIDs, nodeID = NODE_ID()) {

  stopifnot(!is.null(sampleIDs), is.character(sampleIDs), base::all(sampleIDs %in% sampleIDs(object)) == TRUE)

  qc_list <- qualityControl(object)[sampleIDs]

  stopifnot(!is.null(qc_list), base::all(sampleIDs %in% names(qc_list)) == TRUE)

  res <- object[unique(unname(unlist(sapply(qc_list, function(qc) geneIDs(qc))))),
                unname(unlist(sapply(qc_list, function(qc) cellIDs(qc))))]

  reducedDims(res) <- SimpleList()
  res@nodeID <- nodeID
  res@preprocConfig <- preprocConfig(object)[sampleIDs]
  res@qualityControl <- qc_list
  names(res@qualityControl) <- sampleIDs
  res@unprocessedData <- res@unprocessedData[, .subset_cells(colnames(res@unprocessedData), sampleIDs, cell2SampleMap(object))]

  return(res)
})

#'
#' @rdname ScandalDataSet
#'
#' @importFrom tibble as_tibble
#' @export
setMethod("colTbl", "ScandalDataSet", function(object, ...) {

  return (as_tibble(colData(object), rownames = "CellID"))
})

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Display
###

scat <- function(fmt, vals=character(), exdent=2, ...) {
  vals <- ifelse(nzchar(vals), vals, "''")
  lbls <- paste(S4Vectors:::selectSome(vals), collapse=" ")
  txt <- sprintf(fmt, length(vals), lbls)
  cat(strwrap(txt, exdent=exdent, ...), sep="\n")
}

#'
#' @rdname ScandalDataSet
#'
#' @importFrom methods callNextMethod
#'
#' @export
setMethod("show", "ScandalDataSet", function(object) {
  callNextMethod()
  scat("sampleIDs(%d): %s\n", sampleIDs(object))
  cat("nodeID:", nodeID(object), "\n")
  cat("projectID:", projectID(object), "\n")
  scat("qualityControl(%d): %s\n", names(qualityControl(object)))
  cat("unprocessedData:", class(unprocessedData(object)), "with", NROW(unprocessedData(object)), "rows and", NCOL(unprocessedData(object)), "columns\n")
  show(preprocConfig(object))
})

#' @importFrom methods is
is_scandal_object <- function(object) { return (!is.null(object) & is(object, "ScandalDataSet")) }

#' @importFrom methods is
is_valid_assay <- function(x) { return (!(is.null(x)) & (is(x, "Matrix") | is.matrix(x))) }

# Generates a random experiment ID by sampling a random integer
NODE_ID <- function() { paste0("NODE", base::sample(1:1e9, 1, replace = FALSE)) }
PROJ_ID <- function() { paste0("PROJ", base::sample(1:1e9, 1, replace = FALSE)) }

DEFAULT_CELL_2_NODE_MAP <- function(cell_ids) {

  stopifnot(!is.null(cell_ids), is.vector(cell_ids), is.character(cell_ids))

  return (base::gsub("-.*", "", cell_ids))
}

### -------------------------------------------------------------------------
### ScandalDataSet objects (end)
### =========================================================================
###

### =========================================================================
### ScandalMetaprograms objects (start)
### -------------------------------------------------------------------------
###

#'
#' @title ScandalMetaprograms class
#'
#' @description An S4 class for storing single-cell seqeuncing data, reduced
#' dimensions representations of the data reuqired in the analysis process such as
#' t-SNE and UMAP coordinates and the end-product of the analysis which are the
#' transcriptional programs.
#'
#' @details The S4 class \code{ScandalDataSet} inherits from and extends Bioconductor's
#' base class for single-cell related applications, the \linkS4class{SingleCellExperiment}
#' class.
#' The idea behind \link{scandal} is that in order to detect intra-tumor heterogeneity
#' one needs inspect each tumor individually to collect the different transcriptomic
#' programs that can be found within each tumor and then assess these programs at the
#' level of the entire dataset to define the programs that generalize best
#' (meta-programs).
#' Besides the functionality supplied by its superclasses, \code{ScandalDataSet}
#' supplies methods to keep
#'
#' @slot wsPrograms
#' @slot wsScores
#' @slot l2R
#' @slot corL2R
#' @slot consensusClusters
#' @slot mpL2R
#' @slot metaPrograms
#' @slot mpScores
#' @slot mpAssigned
#' @slot mpMap
#' @slot scoringStrategy
#' @slot scoreThreshold
#' @slot nodeID
#' @slot projectID
#'
#' @section Constructor:
#' Constructs a \code{ScandalMetaprograms} object.
#'
#' @section Methods:
#' \describe{
#'   \item{\code{wsPrograms}}{Getter for the wsPrograms list}
#'   \item{\code{wsScores}}{Getter for the wsScores list}
#'   \item{\code{l2R}}{Getter for the l2R matrix}
#'   \item{\code{corL2R}}{Getter for the corL2R matrix}
#'   \item{\code{consensusClusters}}{Getter for the consensusClusters}
#'   \item{\code{mpL2R}}{Getter for the mpL2R list}
#'   \item{\code{metaPrograms}}{Getter for the metaPrograms list}
#'   \item{\code{mpScores}}{Getter for the mpScores matrix}
#'   \item{\code{mpAssigned}}{Getter for the mpAssigned vector}
#'   \item{\code{mpMap}}{Getter/setter for the mpMap list}
#'   \item{\code{scoringStrategy}}{Getter for the scoringStrategy}
#'   \item{\code{scoreThreshold}}{Getter for the scoreThreshold}
#'   \item{\code{nodeID}}{Getter for the nodeID}
#'   \item{\code{projectID}}{Getter for the projectID}
#' }
#'
#' @rdname ScandalMetaprograms
#'
#' @author Avishay Spitzer
#'
#' @importFrom S4Vectors DataFrame
#'
#' @export
#' @exportClass ScandalMetaprograms
setClass("ScandalMetaprograms",
         slots = c(wsPrograms = "list", # Within-sample programs
                   wsScores = "list", # Scores for the within-sample programs
                   l2R = "matrix", # Log2-ratio matrix, genes in rows, each column represents a program that came from a specific sample
                   corL2R = "matrix", # Pairwise correlation matrix computed on the L2R matrix
                   consensusClusters = "list", # List with the consensus clustering data
                   mpL2R = "list", # List of L2R vectors for each meta program
                   metaPrograms = "list", # List of vectors of gene symbols
                   mpScores = "matrix", # Matrix of scores (rows - cell IDs, columns - meta programs)
                   mpAssigned = "character", # Vector of MP assignment for each cell
                   mpMap = "character", # Named vector that maps each MP to a name representing the MP (e.g. Cell Cycle, AC etc.)
                   scoringStrategy = "character",
                   scoreThreshold = "numeric",
                   nodeID = "character",
                   projectID = "character"),
         contains = "DataFrame")

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Constructor
###

#  Will be added by Roxygen to the class documentation

#' @usage
#' ## Constructor
#' ScandalMetaprograms(...)
#'
#' @param ... arguments to pass to the \linkS4class{DataFrame} constructor.
#' @param wsPrograms
#' @param wsScores
#' @param l2R
#' @param corL2R
#' @param consensusClusters
#' @param mpL2R
#' @param metaPrograms
#' @param mpScores
#' @param mpAssigned
#' @param mpMap
#' @param scoringStrategy
#' @param scoreThreshold
#' @param nodeID a unique identifier of the constructed \code{ScandalDataSet}
#' object that should represent the specific sample. If not supplied a unique ID
#' will be generated randomly however it is advised to set this field.
#' @param projectID an identifier common to all the nodes in the constructed
#' \code{ScandalDataSet} object.  If not supplied a unique ID
#' will be generated randomly however it is advised to set this field.
#'
#' @importClassesFrom S4Vectors DataFrame
#' @importFrom S4Vectors DataFrame
#' @importFrom methods new is as
#'
#' @export
ScandalMetaprograms <- function(..., wsPrograms, wsScores, l2R, corL2R, consensusClusters, mpL2R, metaPrograms, mpScores, mpAssigned, mpMap, scoringStrategy, scoreThreshold, nodeID, projectID) {

  if (is.null(mpMap))
    mpMap <- c(stats::setNames(object = names(metaPrograms), nm = names(metaPrograms)), c("NA" = "NA"))

  stopifnot(!is.null(mpMap))
  stopifnot(is.character(mpMap))
  stopifnot(all(names(metaPrograms) %in% names(mpMap)) == TRUE)

  df <- as.data.frame(lapply(1:length(metaPrograms), function(x) {
    mp <- mpMap[paste0("MP", x)]
    setNames(list(metaPrograms[[x]],
                  mpL2R[[x]][metaPrograms[[x]]]),
             c(mp, paste0(mp, ".L2R")))
  }))

  df <- DataFrame(..., df, row.names = NULL)

  object <- new("ScandalMetaprograms", df,
                wsPrograms = wsPrograms,
                wsScores = wsScores,
                l2R = l2R,
                corL2R = corL2R,
                consensusClusters = consensusClusters,
                mpL2R = mpL2R,
                metaPrograms = metaPrograms,
                mpScores = mpScores,
                mpAssigned = mpAssigned,
                mpMap = mpMap,
                scoringStrategy = scoringStrategy,
                scoreThreshold = scoreThreshold,
                nodeID = nodeID,
                projectID = projectID)

  return (object)
}

### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
### Validity
###

setValidity("ScandalMetaprograms", function(object) {

  if (all(object@mpAssigned %in% names(object@mpMap)) == FALSE)
    return (sprintf("The names of mpMap element should contain all assigned MPs"))

  if (all(c("ccp", "best_k") %in% names(object@consensusClusters)) == FALSE)
    return (sprintf("consensusClusters must contain at least two element named ccp and best_k"))

  return (TRUE)
})

#'
#' @rdname ScandalMetaprograms
#'
#' @export
setMethod("wsPrograms", "ScandalMetaprograms",   function(object, ..., as_tibble = FALSE) {

  if (isTRUE(as_tibble))
    res <- tibble::as_tibble(unlist(object@wsPrograms, recursive = FALSE))
  else
    res <- object@wsPrograms

  return (res)
})
#'
#' @rdname ScandalMetaprograms
#'
#' @export
setMethod("wsScores", "ScandalMetaprograms",   function(object, ...) {

  return (object@wsScores)
})

#'
#' @rdname ScandalMetaprograms
#'
#' @export
setMethod("l2R", "ScandalMetaprograms",   function(object, ..., as_tibble = FALSE) {

  if (isTRUE(as_tibble))
    res <- tibble::as_tibble(object@l2R, rownames = "Gene")
  else
    res <- object@l2R

  return (res)
})

#'
#' @rdname ScandalMetaprograms
#'
#' @export
setMethod("corL2R", "ScandalMetaprograms",   function(object, ...) {

  return (object@corL2R)
})

#'
#' @rdname ScandalMetaprograms
#'
#' @export
setMethod("consensusClusters", "ScandalMetaprograms",   function(object, ..., best = FALSE) {

  if (isFALSE(best))
    res <- object@consensusClusters
  else
    res <- object@consensusClusters$ccp[[object@consensusClusters$best_k]]

  return (res)
})

#'
#' @rdname ScandalMetaprograms
#'
#' @export
setMethod("mpL2R", "ScandalMetaprograms",   function(object, ..., as_tibble = FALSE) {

  if (isTRUE(as_tibble))
    res <- tibble::as_tibble(as.data.frame(object@mpL2R), rownames = "Gene")
  else
    res <- object@mpL2R

  return (res)
})

#'
#' @rdname ScandalMetaprograms
#'
#' @export
setMethod("metaPrograms", "ScandalMetaprograms",   function(object, ..., as_tibble = FALSE) {

  if (isTRUE(as_tibble))
    res <- tibble::as_tibble(object@metaPrograms)
  else
    res <- object@metaPrograms

  return (res)
})

#'
#' @rdname ScandalMetaprograms
#'
#' @export
setMethod("mpScores", "ScandalMetaprograms",   function(object, ..., as_tibble = FALSE) {

  if (isTRUE(as_tibble))
    res <- tibble::as_tibble(object@mpScores, rownames = "CellID")
  else
    res <- object@mpScores

  return (res)
})

#'
#' @rdname ScandalMetaprograms
#'
#' @export
setMethod("mpAssigned", "ScandalMetaprograms",   function(object, ..., return_mapped = TRUE) {

  if (isTRUE(return_mapped))
    res <- stats::setNames(object = object@mpMap[object@mpAssigned], nm = names(object@mpAssigned))
  else
    res <- object@mpAssigned

  return (res)
})

#'
#' @rdname ScandalMetaprograms
#'
#' @export
setMethod("mpMap", "ScandalMetaprograms",   function(object, ...) {
  return (object@mpMap)
})

#'
#' @param object a \code{ScandalMetaprograms} object.
#' @param value a value to replace the currently set value (applies to all setter methods).
#'
#' @rdname ScandalMetaprograms
#'
#' @export
setReplaceMethod("mpMap", c("ScandalMetaprograms", "ANY"),   function(object, ..., value) {
  stopifnot(!is.null(value))
  stopifnot(is.character(value))
  stopifnot(all(names(object@metaPrograms) %in% names(value)) == TRUE)

  object@mpMap <- value

  colnames(object) <- unlist(lapply(strsplit(colnames(object), split = "\\."), function(x) ifelse(length(x) == 1, value[x[[1]]], paste0(value[x[[1]]], ".", x[[2]]))), recursive = FALSE)

  return (object)
})

#'
#' @rdname ScandalMetaprograms
#'
#' @export
setMethod("scoringStrategy", "ScandalMetaprograms",   function(object, ...) {
  return (object@scoringStrategy)
})

#'
#' @rdname ScandalMetaprograms
#'
#' @export
setMethod("scoreThreshold", "ScandalMetaprograms",   function(object, ...) {
  return (object@scoreThreshold)
})

#'
#' @rdname ScandalMetaprograms
#'
#' @export
setMethod("nodeID", "ScandalMetaprograms", function(object) {
  return(object@nodeID)
})

#'
#' @rdname ScandalMetaprograms
#'
#' @export
setMethod("projectID", "ScandalMetaprograms", function(object) {
  return(object@projectID)
})

### -------------------------------------------------------------------------
### ScandalMetaprograms objects (end)
### =========================================================================
###
dravishays/scandal documentation built on Jan. 8, 2020, 1:30 p.m.