monocle3: Clustering, Differential Expression, and Trajectory Analysis for Single-Cell RNA-Seq

Documented in clear_cds_slots combine_cds detect_genes estimate_size_factors get_citations mc_es_apply normalized_counts

# Test whether a matrix is one of our supported in-memory sparse matrices
is_sparse_matrix <- function(x){
  any(class(x) %in% c("dgCMatrix", "dgTMatrix", "lgCMatrix", "CsparseMatrix"))
}


# Test whether an object is a matrix.
is_matrix <- function(x) {
  return(methods::is(x, 'matrix') || is_sparse_matrix(x) || methods::is(x, 'IterableMatrix'))
}

# Test whether 
#' Function to calculate size factors for single-cell RNA-seq data
#'
#' @param cds The cell_data_set
#' @param round_exprs A logic flag to determine whether or not the expression
#'   value should be rounded
#' @param method A string to specify the size factor calculation approach.
#'   Options are "mean-geometric-mean-total" (default),
#'   "mean-geometric-mean-log-total".
#'
#' @return Updated cell_data_set object with a new colData column called
#'   'Size_Factor'.
#'
#' @examples
#'   \donttest{
#'     cds <- load_a549()
#'     colData(cds)[['Size_Factor']] <- NULL
#'     cds <- estimate_size_factors(cds)
#'   }
#'
#' @export
estimate_size_factors <- function(cds,
                                  round_exprs=TRUE,
                                  method=c("mean-geometric-mean-total",
                                           'mean-geometric-mean-log-total'))
{
  method <- match.arg(method)
  if(methods::is(SingleCellExperiment::counts(cds), 'IterableMatrix')) {
    if(any(BPCells::colSums(SingleCellExperiment::counts(cds)) == 0)) {
      warning("Your CDS object contains cells with zero reads. ",
                    "This causes size factor calculation to fail. Please remove ",
                    "the zero read cells using ",
                    "cds <- cds[,Matrix::colSums(counts(cds)) != 0] and then ",
                    "run cds <- estimate_size_factors(cds)")
      return(cds)
    }
  }
  else {
    if(any(Matrix::colSums(SingleCellExperiment::counts(cds)) == 0)) {
      warning("Your CDS object contains cells with zero reads. ",
                    "This causes size factor calculation to fail. Please remove ",
                    "the zero read cells using ",
                    "cds <- cds[,Matrix::colSums(counts(cds)) != 0] and then ",
                    "run cds <- estimate_size_factors(cds)")
      return(cds)
    }
  }

  if (is_sparse_matrix(SingleCellExperiment::counts(cds))){
    size_factors(cds) <- estimate_sf_sparse(SingleCellExperiment::counts(cds),
                                            round_exprs=round_exprs,
                                            method=method)
  } else if(methods::is(SingleCellExperiment::counts(cds), 'IterableMatrix')) {
    size_factors(cds) <- estimate_sf_bpcells(SingleCellExperiment::counts(cds),
                                           round_exprs=round_exprs,
                                           method=method)
  }else{
    size_factors(cds) <- estimate_sf_dense(SingleCellExperiment::counts(cds),
                                           round_exprs=round_exprs,
                                           method=method)
  }
  return(cds)
}

# Estimate size factors for each column, given a sparseMatrix from the Matrix
# package
estimate_sf_sparse <- function(counts,
                               round_exprs=TRUE,
                               method="mean-geometric-mean-total"){
  if (round_exprs)
    counts <- round(counts)

  if(method == 'mean-geometric-mean-total') {
    cell_total <- Matrix::colSums(counts)
    sfs <- cell_total / exp(mean(log(cell_total)))
  }else if(method == 'mean-geometric-mean-log-total') {
    cell_total <- Matrix::colSums(counts)
    sfs <- log(cell_total) / exp(mean(log(log(cell_total))))
  }

  sfs[is.na(sfs)] <- 1
  sfs
}

# Estimate size factors for each column, given an IterableMatrix (or
# derived class) from the BPCells package.
estimate_sf_bpcells <- function(counts,
                               round_exprs=TRUE,
                               method="mean-geometric-mean-total"){
  if (round_exprs)
    counts <- round(counts)

  if(method == 'mean-geometric-mean-total') {
    cell_total <- BPCells::colSums(counts)
    sfs <- cell_total / exp(mean(log(cell_total)))
  }else if(method == 'mean-geometric-mean-log-total') {
    cell_total <- BPCells::colSums(counts)
    sfs <- log(cell_total) / exp(mean(log(log(cell_total))))
  }

  sfs[is.na(sfs)] <- 1
  sfs
}

# Estimate size factors for each column, given a matrix
estimate_sf_dense <- function(counts,
                              round_exprs=TRUE,
                              method="mean-geometric-mean-total"){

  CM <- counts
  if (round_exprs)
    CM <- round(CM)
  if(method == "mean-geometric-mean-log-total") {
    cell_total <- apply(CM, 2, sum)
    sfs <- log(cell_total) / exp(mean(log(log(cell_total))))
  } else if(method == 'mean-geometric-mean-total') {
    cell_total <- apply(CM, 2, sum)
    sfs <- cell_total / exp(mean(log(cell_total)))
  }

  sfs[is.na(sfs)] <- 1
  sfs
}

sparse_apply <- function(Sp_X, MARGIN, FUN, convert_to_dense, ...){
  if (convert_to_dense){
    if (MARGIN == 1){
      Sp_X <- Matrix::t(Sp_X)
      res <- lapply(colnames(Sp_X), function(i, FUN, ...) {
        FUN(as.matrix(Sp_X[,i]), ...)
      }, FUN, ...)
    }else{
      res <- lapply(colnames(Sp_X), function(i, FUN, ...) {
        FUN(as.matrix(Sp_X[,i]), ...)
      }, FUN, ...)
    }
  }else{
    if (MARGIN == 1){
      Sp_X <- Matrix::t(Sp_X)
      res <- lapply(colnames(Sp_X), function(i, FUN, ...) {
        FUN(Sp_X[,i], ...)
      }, FUN, ...)
    }else{
      res <- lapply(colnames(Sp_X), function(i, FUN, ...) {
        FUN(Sp_X[,i], ...)
      }, FUN, ...)
    }
  }

  return(res)

}

# 20230424 bge: my test suggests that I can subset a BPCells matrix using
#               the subset operator with the form x[i, , drop = FALSE]
#' @noRd
split_rows <- function (x, ncl) {
  lapply(parallel::splitIndices(nrow(x), ncl),
         function(i) x[i, , drop = FALSE])
}

# 20230424 bge: my test suggests that I can subset a BPCells matrix using
#               the subset operator with the form x[i, , drop = FALSE]
#' @noRd
split_cols <- function (x, ncl) {
  lapply(parallel::splitIndices(ncol(x), ncl),
         function(i) x[, i, drop = FALSE])
}

#' @noRd
sparse_par_r_apply <- function (cl, x, FUN, convert_to_dense, ...) {
  par_res <- do.call(c, parallel::clusterApply(cl = cl,
                                               x = split_rows(x,
                                                              length(cl)),
                                               fun = sparse_apply, MARGIN = 1L,
                                               FUN = FUN,
                                               convert_to_dense=convert_to_dense, ...),
                     quote = TRUE)
  names(par_res) <- row.names(x)
  par_res
}

#' @noRd
sparse_par_c_apply <- function (cl = NULL, x, FUN, convert_to_dense, ...) {
  par_res <- do.call(c, parallel::clusterApply(cl = cl,
                                               x = split_cols(x,
                                                              length(cl)),
                                               fun = sparse_apply, MARGIN = 2L,
                                               FUN = FUN,
                                               convert_to_dense=convert_to_dense, ...),
                     quote = TRUE)
  names(par_res) <- colnames(x)
  par_res
}


#' Multicore apply-like function for cell_data_set
#'
#' mc_es_apply computes the row-wise or column-wise results of FUN, just like
#' esApply. Variables in colData from cds are available in FUN.
#'
#' @param cds A cell_data_set object.
#' @param MARGIN The margin to apply to, either 1 for rows (samples) or 2 for
#'   columns (features).
#' @param FUN Any function.
#' @param required_packages A list of packages FUN will need. Failing to
#'   provide packages needed by FUN will generate errors in worker threads.
#' @param convert_to_dense Whether to force conversion of a sparse matrix to a
#'   dense one before calling FUN.
#' @param reduction_method character, the method used to reduce dimension.
#'   Default "UMAP".
#' @param ... Additional parameters for FUN.
#' @param cores The number of cores to use for evaluation.
#'
#' @importFrom Biobase multiassign
#' @return The result of with(colData(cds) apply(counts(cds)), MARGIN, FUN, ...))
mc_es_apply <- function(cds, MARGIN, FUN, required_packages, cores=1,
                        convert_to_dense=TRUE,
                        reduction_method="UMAP", ...) {
# message('mc_es_apply: start')
  parent <- environment(FUN)
  if (is.null(parent))
    parent <- emptyenv()
  e1 <- new.env(parent=parent)
  coldata_df = as.data.frame(colData(cds))
  tryCatch({
    coldata_df$cluster = clusters(cds, reduction_method)[colnames(cds)]
    coldata_df$partition = partitions(cds, reduction_method)[colnames(cds)]
  }, error = function(e) {} )

  tryCatch({
    coldata_df$pseudotime = pseudotime(cds)
  }, error = function(e) {} )

  Biobase::multiassign(names(as.data.frame(coldata_df)),
                       as.data.frame(coldata_df), envir=e1)
  environment(FUN) <- e1


  platform <- Sys.info()[['sysname']]

  # Temporarily disable OpenMP threading in functions to be run in parallel
  old_omp_num_threads = as.numeric(Sys.getenv("OMP_NUM_THREADS"))
  if (is.na(old_omp_num_threads)){
    old_omp_num_threads = 1
  }
  RhpcBLASctl::omp_set_num_threads(1)

  # Temporarily set the number of threads the BLAS library can use to be 1
  old_blas_num_threads = as.numeric(Sys.getenv("OPENBLAS_NUM_THREADS"))
  if (is.na(old_omp_num_threads)){
    old_blas_num_threads = 1
  }
  RhpcBLASctl::blas_set_num_threads(1)

  # Note: use outfile argument to makeCluster for debugging
  if (platform == "Windows")
    cl <- parallel::makeCluster(cores)
  if (platform %in% c("Linux", "Darwin"))
    cl <- parallel::makeCluster(cores, type="FORK")

  cleanup <- function(){
    parallel::stopCluster(cl)
    RhpcBLASctl::omp_set_num_threads(old_omp_num_threads)
    RhpcBLASctl::blas_set_num_threads(old_blas_num_threads)
  }
  on.exit(cleanup)

  if (is.null(required_packages) == FALSE){
    parallel::clusterCall(cl, function(pkgs) {
      options(conflicts.policy =
                list(error = FALSE,
                     warn = FALSE,
                     generics.ok = TRUE,
                     can.mask = c("base", "methods", "utils",
                                  "grDevices", "graphics",
                                  "stats"),
                     depends.ok = TRUE))
      for (req in pkgs) {
        suppressMessages(library(req, character.only=TRUE, warn.conflicts=FALSE, quietly=TRUE,
                verbose=FALSE))
      }
    }, required_packages)
  }

  #
  # 20230424 bge: The following sparse_par_?_apply calls using counts(cds) appear to work based on my simple test.
  #
  if (MARGIN == 1){
# message('mc_es_apply: MARGIN 1')
    if( methods::is(counts(cds), 'IterableMatrix')) {
# message('mc_es_apply: BPCells matrix')
      suppressWarnings(res <- sparse_par_r_apply(cl=cl, x=monocle3::counts_row_order(cds), FUN=FUN,
                                                 convert_to_dense=convert_to_dense, ...))
    }
    else {
# message('mc_es_apply: dgCMatrix matrix')
      suppressWarnings(res <- sparse_par_r_apply(cl=cl, x=SingleCellExperiment::counts(cds), FUN=FUN,
                                                 convert_to_dense=convert_to_dense, ...))
    }
  }
  else {
    suppressWarnings(res <- sparse_par_c_apply(cl=cl, x=SingleCellExperiment::counts(cds), FUN=FUN,
                                               convert_to_dense=convert_to_dense, ...))
  }

  res
}

#' @importFrom Biobase multiassign
smart_es_apply <- function(cds, MARGIN, FUN, convert_to_dense,
                           reduction_method="UMAP", ...) {
# message('smart_es_apply: start')
  parent <- environment(FUN)
  if (is.null(parent))
    parent <- emptyenv()
  e1 <- new.env(parent=parent)
  coldata_df = as.data.frame(colData(cds))
  tryCatch({
    coldata_df$cluster = clusters(cds, reduction_method)[colnames(cds)]
    coldata_df$partition = partitions(cds, reduction_method)[colnames(cds)]
    coldata_df$pseudotime = pseudotime(cds)
  }, error = function(e) {} )
  Biobase::multiassign(names(as.data.frame(coldata_df)),
                       as.data.frame(coldata_df), envir=e1)
  environment(FUN) <- e1

  if (methods::is(SingleCellExperiment::counts(cds), 'IterableMatrix')) {
# message('smart_es_apply: BPCells matrix')
    if(MARGIN == 1) {
# message('smart_es_apply: MARGIN 1')
      res <- sparse_apply(monocle3::counts_row_order(cds), MARGIN, FUN, convert_to_dense, ...)
    }
    else {
# message('smart_es_apply: MARGIN 2')
      res <- sparse_apply(SingleCellExperiment::counts(cds), MARGIN, FUN, convert_to_dense, ...)
    }
  }
  else
  if (is_sparse_matrix(SingleCellExperiment::counts(cds))) {
# message('smart_es_apply: dgCMatrix matrix')
    res <- sparse_apply(SingleCellExperiment::counts(cds), MARGIN, FUN, convert_to_dense, ...)
  }
  else {
# message('smart_es_apply: dense matrix')
    res <- pbapply::pbapply(SingleCellExperiment::counts(cds), MARGIN, FUN, ...)
  }

  if (MARGIN == 1)
  {
    names(res) <- row.names(cds)
  }else{
    names(res) <- colnames(cds)
  }

  res
}


#' Detects genes above minimum threshold.
#'
#' @description For each gene in a cell_data_set object, detect_genes counts
#' how many cells are expressed above a minimum threshold. In addition, for
#' each cell, detect_genes counts the number of genes above this threshold that
#' are detectable. Results are added as columns num_cells_expressed and
#' num_genes_expressed in the rowData and colData tables respectively.
#'
#' @param cds Input cell_data_set object.
#' @param min_expr Numeric indicating expression threshold
#' @return Updated cell_data_set object
#' @export
#' @examples
#' \dontrun{
#'    cds <- detect_genes(cds, min_expr=0.1)
#' }
detect_genes <- function(cds, min_expr=0){
  assertthat::assert_that(methods::is(cds, "cell_data_set"))
    assertthat::assert_that(is.numeric(min_expr))

  mat_bin <- counts(cds) > min_expr
  rowData(cds)$num_cells_expressed <- Matrix::rowSums(mat_bin > min_expr)
  colData(cds)$num_genes_expressed <- Matrix::colSums(mat_bin > min_expr)

  cds
}

#' Return a size-factor normalized and (optionally) log-transformed expression
#' matrix
#'
#' @param cds A CDS object to calculate normalized expression matrix from.
#' @param norm_method String indicating the normalization method. Options are
#'   "log" (Default), "binary" and "size_only".
#' @param pseudocount A pseudocount to add before log transformation. Ignored
#'   if norm_method is not "log". Default is 1.
#' @return Size-factor normalized, and optionally log-transformed, expression
#'   matrix.
#'
#' @examples
#'   \donttest{
#'     cds <- load_a549()
#'     normalized_matrix <- normalized_counts(cds)
#'   }
#'
#' @export
normalized_counts <- function(cds,
                              norm_method=c("log", "binary", "size_only"),
                              pseudocount=1) {
  norm_method <- match.arg(norm_method)

  norm_mat <- SingleCellExperiment::counts(cds)

  if (norm_method == "binary"){
    if(methods::is(norm_mat, 'IterableMatrix')) {
      norm_mat <- BPCells::binarize(norm_mat, threshold=0, strict_inequality=TRUE)
    }
    else {
      # The '+ 0' coerces the matrix to type numeric. It's possible
      # to use 'as.numeric(norm_mat > 0)' but the matrix
      # attributes disappear...
      norm_mat <- (norm_mat > 0) + 0
      if (is_sparse_matrix(norm_mat)) {
        norm_mat = methods::as(norm_mat, "dgCMatrix")
      }
    }
  }
  else {
    assertthat::assert_that(!is.null(size_factors(cds)))
    if(methods::is(norm_mat, 'IterableMatrix')) {
      if(norm_method == 'log' && pseudocount != 1) {
        stop('normalized_counts: pseudocount must be 1 for sparse expression matrices and norm_method log')
      }
      norm_mat <- BPCells::t(BPCells::t(norm_mat) / size_factors(cds))
      if(norm_method == 'log' && pseudocount == 1) {
        norm_mat <- log1p(norm_mat) / log(10)
      }
    }
    else
    if (is_sparse_matrix(norm_mat)){
      norm_mat <- norm_mat
      norm_mat@x = norm_mat@x / rep.int(size_factors(cds), diff(norm_mat@p))
      if (norm_method == "log"){
        if (pseudocount == 1){
          norm_mat@x = log10(norm_mat@x + pseudocount)
        }else{
          stop("Pseudocount must equal 1 with sparse expression matrices")
        }
      }
    }else{
      norm_mat = Matrix::t(Matrix::t(norm_mat) / size_factors(cds))
      if (norm_method == "log"){
#          norm_mat@x <- log10(norm_mat + pseudocount)
          norm_mat <- log10(norm_mat + pseudocount)
      }
    }
  }

  return(norm_mat)
}


#
#  If length(matrix_control) == 0 and none of the cdses in cds_list
#  are BPCells class, return a matrix_control list for a dgCMatrix
#  matrix; otherwise, return a matrix_control list for a BPCells
#  matrix.
#  If length(matrix_control) > 0, return a matrix_control list with
#  the class given in the matrix_control parameter.
#
#  Notes:
#    o  we cannot use set_matrix_control_default() in combine_cds because
#       cds_list has more than one cds to test.
#    o  matrix_control[['matrix_class']] must be set in a
#       matrix_control list.
#
set_matrix_control_combine_cds <- function(cds_list=list(), matrix_control=list(), verbose=FALSE) {
  if(length(matrix_control) > 0) {
    assertthat::assert_that(!is.null(matrix_control[['matrix_class']]),
                            msg = paste0('set_matrix_control_combine_cds: matrix_control[[\'matrix_class\']] missing in matrix_control list.'))

    tryCatch(check_matrix_control(matrix_control=matrix_control, control_type='unrestricted', check_conditional=FALSE),
             error = function(c) {stop(paste0(trimws(c), '\n* error in combine_cds')) })
             
    if(matrix_control[['matrix_class']] == 'BPCells') {
      bpcells_matrix_flag <- TRUE
    }
    else {
      bpcells_matrix_flag <- FALSE
    }
  }
  else {
    bpcells_matrix_flag <- FALSE
    # Are any of the count matrices BPCells class?
    for(i in seq(1, length(cds_list), 1)) {
      if(methods::is(counts(cds_list[[i]]), 'IterableMatrix')) {
        bpcells_matrix_flag <- TRUE
        break
      }
    }
  }

  if(bpcells_matrix_flag) {
    matrix_control_default <- get_global_variable('matrix_control_bpcells_unrestricted')
  }
  else {
    matrix_control_default <- get_global_variable('matrix_control_csparsematrix_unrestricted')
  }

  matrix_control_out <- set_matrix_control(matrix_control=matrix_control, matrix_control_default=matrix_control_default, control_type='unrestricted')

  return(matrix_control_out)
}


#' Combine a list of cell_data_set objects
#'
#' This function will combine a list of cell_data_set objects into a new
#' cell_data_set object.
#'
#' @details If any of the counts matrices is BPCells class, the combined
#'   counts matrix will be BPCells class.
#'
#' @param cds_list List of cds objects to be combined.
#' @param keep_all_genes Logical indicating what to do if there is a mismatch
#'   in the gene sets of the CDSs. If TRUE, all genes are kept and cells from
#'   CDSs missing a given gene will be filled in with zeroes. If FALSE, only
#'   the genes in common among all of the CDSs will be kept. Default is TRUE.
#' @param cell_names_unique Logical indicating whether all of the cell IDs
#'   across all of the CDSs are unique. If FALSE, the CDS name is appended to
#'   each cell ID to prevent collisions. These cell IDs are used as count matrix
#'   column names and colData(cds) row names. Cell names stored in other
#'   cds locations are not modified so you will need to modify them manually
#'   for consistency. Default is FALSE.
#' @param sample_col_name A string to be the column name for the colData column
#'   that indicates which original cds the cell derives from. Default is
#'   "sample".
#' @param keep_reduced_dims Logical indicating whether to keep the reduced
#'   dimension matrices. Do not keep the reduced dimensions unless you know
#'   that the reduced dimensions are the same in each CDS. This is true for
#'   projected data sets, for example. Default is FALSE.
#' @param matrix_control A list used to control how the counts matrix is
#'    is stored in the CDS. By default, combine_cds stores the counts
#'    matrix as an in-memory, sparse (dgCMatrix), unless (a) at least one
#'    of the cdses in cds_list uses a BPCells counts matrix, or
#'    (b) you specify matrix_control=list(matrix_class='BPCells').
#' @param verbose Whether to emit verbose output while running
#'   combine_cds.
#'   Default is FALSE.
#' @return A combined cell_data_set object.
#' @export
#'
combine_cds <- function(cds_list,
                        keep_all_genes = TRUE,
                        cell_names_unique = FALSE,
                        sample_col_name = "sample",
                        keep_reduced_dims = FALSE,
                        matrix_control = list(),
                        verbose=FALSE) {

  assertthat::assert_that(is.list(cds_list),
                          msg=paste("cds_list must be a list."))

  assertthat::assert_that(all(sapply(cds_list, class) == "cell_data_set"),
                          msg=paste("All members of cds_list must be",
                                    "cell_data_set class."))
  assertthat::assert_that(is.character(sample_col_name))

  if (sample_col_name == "sample" &
      any(sapply(cds_list, function(cds) "sample" %in% names(colData(cds))))) {
    warning("By default, the combine_cds function adds a column called ",
                   "'sample' which indicates which initial cds a cell came ",
                   "from. One or more of your input cds objects contains a ",
                   "'sample' column, which will be overwritten. We recommend ",
                   "you rename this column or provide an alternative column ",
                   "name using the 'sample_col_name' parameter.")
  }
  assertthat::assert_that(!any(sapply(cds_list, function(cds)
    sum(is.na(names(colData(cds)))) != 0)),
                          msg = paste0("One of the input CDS' has a colData ",
                                       "column name that is NA, please ",
                                       "remove or rename that column before ",
                                       "proceeding."))
  assertthat::assert_that(!any(sapply(cds_list, function(cds)
    sum(is.na(names(rowData(cds)))) != 0)),
    msg = paste0("One of the input CDS' has a rowData ",
                 "column name that is NA, please ",
                 "remove or rename that column before ",
                 "proceeding."))

  if(length(matrix_control) > 0) {
    assertthat::assert_that(is.list(matrix_control),
                            msg=paste0('combine_cds: matrix_control must be a list.'))
    assertthat::assert_that(!is.null(matrix_control[['matrix_class']]),
                            msg=paste0('combine_cds: matrix_control[[\'matrix_class\']] missing in matrix_control list.'))
  }

  num_cells <- sapply(cds_list, ncol)
  if(sum(num_cells == 0) != 0) {
    message("Some CDS' have no cells, these will be skipped.")
    cds_list <- cds_list[num_cells != 0]
  }
  if(length(cds_list) == 1) return(cds_list[[1]])

  assertthat::assert_that(is.logical(keep_all_genes))
  assertthat::assert_that(is.logical(cell_names_unique))

  list_named <- TRUE
  if(is.null(names(cds_list))) {
    list_named <- FALSE
  }

  matrix_control <- set_matrix_control_combine_cds(cds_list=cds_list, matrix_control=matrix_control)
  if(matrix_control[['matrix_class']] == 'BPCells')
    bpcells_matrix_flag <- TRUE
  else
    bpcells_matrix_flag <- FALSE

  if(verbose) {
    message('combine_cds: bpcells_matrix_flag: ', bpcells_matrix_flag)
    message('combine_cds: ')
    message(show_matrix_control(matrix_control))
   message()
  }
  
  exprs_list <- list()
  fd_list <- list()
  pd_list <- list()
  gene_list <- c()
  overlap_list <- c(row.names(fData(cds_list[[1]])))
  pdata_cols <- c()
  fdata_cols <- c()
  all_cells <- c()

  for(cds in cds_list) {
    # Make a vector of gene names either all names or
    # only names in common to all CDSes.
    gene_list <-  c(gene_list, row.names(fData(cds)))
    overlap_list <- intersect(overlap_list, row.names(fData(cds)))
    if (!keep_all_genes) {
      gene_list <- overlap_list
    }

    # Make concatenated vectors of column (header) names of the cells
    # and features, and of cell names.
    pdata_cols <- c(pdata_cols, names(pData(cds)))
    fdata_cols <- c(fdata_cols, names(fData(cds)))
    all_cells <- c(all_cells, row.names(pData(cds)))
  }

  # Remove duplicate gene names, feature and cell column
  # names, and cell names.
  gene_list <- unique(gene_list)
  if(length(overlap_list) == 0) {
    if (keep_all_genes) {
      warning("No genes are shared amongst all the CDS objects.")
    } else {
      stop("No genes are shared amongst all the CDS objects. To generate ",
                 "a combined CDS with all genes, use keep_all_genes = TRUE")
    }
  }
  pdata_cols <- unique(pdata_cols)
  fdata_cols <- unique(fdata_cols)
  if (sum(duplicated(all_cells)) != 0 & cell_names_unique) {
    stop("Cell names are not unique across CDSs - cell_names_unique ",
               "must be FALSE.")
  }
  all_cells <- unique(all_cells)

  # Give all CDSes the same set of rows (amongst other things),
  # looping through the CDSes.
  for(i in seq(1, length(cds_list), 1)) {
    pd <- as.data.frame(pData(cds_list[[i]]))

    # Counts matrix rows of genes common to the CDSes examined
    # up to this pass through the loop.
    exp <- counts(cds_list[[i]])
    if(bpcells_matrix_flag && !methods::is(exp, 'IterableMatrix')) {
      exp <- methods::as(exp, 'IterableMatrix')       # wraps dgCMatrix in IterableMatrix
    }
    exp <- exp[intersect(row.names(exp), gene_list),, drop=FALSE]

    # Make cell names distinct, if necessary, assign cell names to
    # pd, the sample names to a column in pd, and cell names to
    # the counts matrix columns.
    if (!cell_names_unique) {
      if(list_named) {
        row.names(pd) <- paste(row.names(pd), names(cds_list)[[i]], sep="_")
        pd[,sample_col_name] <- names(cds_list)[[i]]
      } else {
        row.names(pd) <- paste(row.names(pd), i, sep="_")
        pd[,sample_col_name] <- i
      }
      colnames(exp) <- row.names(pd)
    } else {
      if(list_named) {
        pd[,sample_col_name] <- names(cds_list)[[i]]
      } else {
        pd[,sample_col_name] <- i
      }
    }

    # Initialize new entries in pd to 'NA'.
    not_in <- pdata_cols[!pdata_cols %in% names(pd)]
    for (n in not_in) {
      pd[,n] <- NA
    }

    # Select feature data frame rows that are common to
    # fd and gene_list.
    fd <- as.data.frame(fData(cds_list[[i]]))
    fd <- fd[intersect(row.names(fd), gene_list),, drop=FALSE]

    # Make a vector of gene names that are that are not in
    # fd.
    not_in <- fdata_cols[!fdata_cols %in% names(fd)]
    for(col in names(fd)) {
      if(methods::is(fd[,col], "factor")) {
        fd[,col] <- as.character(fd[,col])
      }
    }
    for (n in not_in) {
      fd[,n] <- NA
    }
    not_in_g <- gene_list[!gene_list %in% row.names(fd)]

    # Make an empty matrix (and fd data frame) with the rows
    # that need to be added to the counts matrix for cds_list[[i]],
    # and append it to the accumulating counts matrix.
    if (length(not_in_g) > 0) {
      not_in_g_df <- as.data.frame(matrix(NA, nrow = length(not_in_g), ncol=ncol(fd)))
      row.names(not_in_g_df) <- not_in_g
      names(not_in_g_df) <- names(fd)
      fd <- rbind(fd, not_in_g_df)      # bge rbind

      extra_rows <- Matrix::Matrix(0, ncol=ncol(exp),
                                   sparse=TRUE,
                                   nrow=length(not_in_g))
      row.names(extra_rows) <- not_in_g
      colnames(extra_rows) <- colnames(exp)

      # Append additional rows.
      if(bpcells_matrix_flag) {
        exp <- rbind2(exp, methods::as(extra_rows, 'IterableMatrix'))       # wraps dgCMatrix in IterableMatrix
      }
      else {
        exp <- rbind(exp, extra_rows)
      }
#      exp <- exp
    }

    # Gather matrices and data frames into lists.
    exprs_list[[i]] <- exp[gene_list, , drop=FALSE]
    fd_list[[i]] <- fd[gene_list, , drop=FALSE]
    pd_list[[i]] <- pd
  }

  all_fd <- array(NA,dim(fd_list[[1]]),dimnames(fd_list[[1]]))

  for (fd in fd_list) {
    for (j in colnames(fd)) {
      col_info <- all_fd[,j]
      col_info[is.na(col_info)] <- fd[is.na(col_info),j]
      col_info[col_info != fd[,j]] <- "conf"
      all_fd[,j] <- col_info
    }
  }

  confs <- sum(all_fd == "conf", na.rm=TRUE)

  if (confs > 0) {
   warning("When combining rowData, conflicting values were found - ",
                  "conflicts will be labelled 'conf' in the combined cds ",
                  "to prevent conflicts, either change conflicting values to ",
                  "match, or rename columns from different cds' to be unique.")
  }
  #all_fd <- do.call(cbind, fd_list)
  all_fd <- all_fd[,fdata_cols, drop=FALSE]

  # Build the final, comprehensive cell data frame and counts matrix.
  all_pd <- do.call(rbind, pd_list)    # bge rbind
  if(bpcells_matrix_flag) {
    num_exprs <- length(exprs_list)
    all_exp <- exprs_list[[1]]
    for(i in seq(2, num_exprs, 1)) {
      all_exp <- cbind2(all_exp, exprs_list[[i]])
    }
  }
  else {
    all_exp <- do.call(cbind, exprs_list)
  }

  # Filter counts matrix by fd and pd names.
  all_exp <- all_exp[row.names(all_fd), row.names(all_pd), drop=FALSE]

  # Make a BPCells count matrix, if necessary.
  if(bpcells_matrix_flag) {
    all_exp <- set_matrix_class(mat=all_exp, matrix_control=matrix_control)
  }

  if(verbose) {
    message('combine_cds: ')
    message(paste0(show_matrix_info(matrix_info=get_matrix_info(mat=all_exp), indent='  ')), appendLF=FALSE)
    message()
  }

  # Make a combined CDS from all_exp, all_pd, and all_fd.
  new_cds <- new_cell_data_set(all_exp, cell_metadata = all_pd, gene_metadata = all_fd)
  new_cds <- set_matrix_citation(new_cds)

  # Add in preprocessing results.
  if(keep_reduced_dims) {
    # Find intersection of reduced dim names, for example, 'PCA', 'UMAP', 'Aligned'.
    reduced_dim_names <- names(reducedDims(cds_list[[1]]))
    for(i in seq(2, length(cds_list), 1)) {
      reduced_dim_names <- intersect(reduced_dim_names, names(reducedDims(cds_list[[i]])))
    }

#    for(red_dim in names(SingleCellExperiment::reducedDims(cds_list[[1]]))) {
    for(red_dim in reduced_dim_names) {
      reduced_dims_list <- list()
      for(j in seq(1, length(cds_list), 1)) {
        reduced_dims_list[[j]] <- SingleCellExperiment::reducedDims(cds_list[[j]])[[red_dim]]
      }
      SingleCellExperiment::reducedDims(new_cds, withDimnames=FALSE)[[red_dim]] <- do.call(rbind, reduced_dims_list, quote=FALSE)
      # The following should not happen; the accessor appears to ensure the
      # correct row order.
      if(!identical(rownames(SingleCellExperiment::reducedDims(new_cds)[[red_dim]]), rownames(all_pd))) {
        stop('Mis-ordered reduced matrix rows.')
      }
    }
  }

  # Add a BPCells row-major order matrix to assays
  # for BPCells count matrices.
  if(bpcells_matrix_flag) {
    new_cds <- set_cds_row_order_matrix(new_cds)
  }

  matrix_id <-  get_unique_id(counts(cds))
  new_cds <- initialize_counts_metadata(new_cds) 
  new_cds <- set_counts_identity(new_cds, 'combin_cds', matrix_id)

  new_cds
}


# Combine cell_data_sets that may have differing numbers of rows.
combine_cds_for_maddy <- function(cds_list,
                        keep_all_genes = TRUE,
                        cell_names_unique = FALSE,
                        sample_col_name = "sample",
                        keep_reduced_dims = FALSE,
                        matrix_control = list()) {

  assertthat::assert_that(is.list(cds_list),
                          msg=paste("cds_list must be a list."))

  assertthat::assert_that(all(sapply(cds_list, class) == "cell_data_set"),
                          msg=paste("All members of cds_list must be",
                                    "cell_data_set class."))
  assertthat::assert_that(is.character(sample_col_name))

  if (sample_col_name == "sample" &
      any(sapply(cds_list, function(cds) "sample" %in% names(colData(cds))))) {
    warning("By default, the combine_cds function adds a column called ",
                   "'sample' which indicates which initial cds a cell came ",
                   "from. One or more of your input cds objects contains a ",
                   "'sample' column, which will be overwritten. We recommend ",
                   "you rename this column or provide an alternative column ",
                   "name using the 'sample_col_name' parameter.")
  }
  assertthat::assert_that(!any(sapply(cds_list, function(cds)
    sum(is.na(names(colData(cds)))) != 0)),
                          msg = paste0("One of the input CDS' has a colData ",
                                       "column name that is NA, please ",
                                       "remove or rename that column before ",
                                       "proceeding."))
  assertthat::assert_that(!any(sapply(cds_list, function(cds)
    sum(is.na(names(rowData(cds)))) != 0)),
    msg = paste0("One of the input CDS' has a rowData ",
                 "column name that is NA, please ",
                 "remove or rename that column before ",
                 "proceeding."))

  num_cells <- sapply(cds_list, ncol)
  if(sum(num_cells == 0) != 0) {
    message("Some CDS' have no cells, these will be skipped.")
    cds_list <- cds_list[num_cells != 0]
  }
  if(length(cds_list) == 1) return(cds_list[[1]])

  assertthat::assert_that(is.logical(keep_all_genes))
  assertthat::assert_that(is.logical(cell_names_unique))

  list_named <- TRUE
  if(is.null(names(cds_list))) {
    list_named <- FALSE
  }

  if(!is.null(matrix_control[['matrix_class']]) &&
     matrix_control[['matrix_class']] == 'BPCells') {
    bpcells_matrix_flag <- TRUE
  }
  else {
    bpcells_matrix_flag <- FALSE
    # Are any of the count matrices BPCells class?
    for(i in seq(1, length(cds_list), 1)) {
      if(methods::is(counts(cds_list[[i]]), 'IterableMatrix')) {
        bpcells_matrix_flag <- TRUE
        break
      }
    }
  }

  check_matrix_control(matrix_control=matrix_control, control_type='unrestricted', check_conditional=FALSE)
  if(bpcells_matrix_flag ||
     (!is.null(matrix_control[['matrix_class']]) && matrix_control[['matrix_class']] == 'BPCells')) {
    matrix_control_default <- get_global_variable('matrix_control_bpcells_unrestricted')
  }
  else {
    matrix_control_default <- get_global_variable('matrix_control_csparsematrix_unrestricted')
  }
  matrix_control <- set_matrix_control(matrix_control=matrix_control, matrix_control_default=matrix_control_default, control_type='unrestricted')

  exprs_list <- list()
  fd_list <- list()
  pd_list <- list()
  gene_list <- c()
  overlap_list <- c(row.names(fData(cds_list[[1]])))
  pdata_cols <- c()
  fdata_cols <- c()
  all_cells <- c()

  for(cds in cds_list) {
    # Make a vector of gene names either all names or
    # only names in common to all CDSes.
    gene_list <-  c(gene_list, row.names(fData(cds)))
    overlap_list <- intersect(overlap_list, row.names(fData(cds)))
    if (!keep_all_genes) {
      gene_list <- overlap_list
    }

    # Make concatenated vectors of column (header) names of the cells
    # and features, and of cell names.
    pdata_cols <- c(pdata_cols, names(pData(cds)))
    fdata_cols <- c(fdata_cols, names(fData(cds)))
    all_cells <- c(all_cells, row.names(pData(cds)))
  }

  # Remove duplicate gene names, feature and cell column
  # names, and cell names.
  gene_list <- unique(gene_list)
  if(length(overlap_list) == 0) {
    if (keep_all_genes) {
      warning("No genes are shared amongst all the CDS objects.")
    } else {
      stop("No genes are shared amongst all the CDS objects. To generate ",
                 "a combined CDS with all genes, use keep_all_genes = TRUE")
    }
  }
  pdata_cols <- unique(pdata_cols)
  fdata_cols <- unique(fdata_cols)
  if (sum(duplicated(all_cells)) != 0 & cell_names_unique) {
    stop("Cell names are not unique across CDSs - cell_names_unique ",
               "must be FALSE.")
  }
  all_cells <- unique(all_cells)

  # Give all CDSes the same set of rows (amongst other things),
  # looping through the CDSes.
  for(i in seq(1, length(cds_list), 1)) {
    pd <- as.data.frame(pData(cds_list[[i]]))

    # Counts matrix rows of genes common to the CDSes examined
    # up to this pass through the loop.
    exp <- counts(cds_list[[i]])
    if(bpcells_matrix_flag && !methods::is(exp, 'IterableMatrix')) {
      exp <- methods::as(exp, 'IterableMatrix')                               # wraps dgCMatrix in IterableMatrix
    }
    exp <- exp[intersect(row.names(exp), gene_list),, drop=FALSE]

    # Make cell names distinct, if necessary, assign cell names to
    # pd, the sample names to a column in pd, and cell names to
    # the counts matrix columns.
    if (!cell_names_unique) {
      if(list_named) {
        row.names(pd) <- paste(row.names(pd), names(cds_list)[[i]], sep="_")
        pd[,sample_col_name] <- names(cds_list)[[i]]
      } else {
        row.names(pd) <- paste(row.names(pd), i, sep="_")
        pd[,sample_col_name] <- i
      }
      colnames(exp) <- row.names(pd)
    } else {
      if(list_named) {
        pd[,sample_col_name] <- names(cds_list)[[i]]
      } else {
        pd[,sample_col_name] <- i
      }
    }

    # Initialize new entries in pd to 'NA'.
    not_in <- pdata_cols[!pdata_cols %in% names(pd)]
    for (n in not_in) {
      pd[,n] <- NA
    }

    # Select feature data frame rows that are common to
    # fd and gene_list.
    fd <- as.data.frame(fData(cds_list[[i]]))
    fd <- fd[intersect(row.names(fd), gene_list),, drop=FALSE]

    # Make a vector of gene names that are that are not in
    # fd.
    not_in <- fdata_cols[!fdata_cols %in% names(fd)]
    for(col in names(fd)) {
      if(methods::is(fd[,col], "factor")) {
        fd[,col] <- as.character(fd[,col])
      }
    }
    for (n in not_in) {
      fd[,n] <- NA
    }
    not_in_g <- gene_list[!gene_list %in% row.names(fd)]

    # Make an empty matrix (and fd data frame) with the rows
    # that need to be added to the counts matrix for cds_list[[i]],
    # and append it to the accumulating counts matrix.
    if (length(not_in_g) > 0) {
      not_in_g_df <- as.data.frame(matrix(NA, nrow = length(not_in_g), ncol=ncol(fd)))
      row.names(not_in_g_df) <- not_in_g
      names(not_in_g_df) <- names(fd)
      fd <- rbind(fd, not_in_g_df)      # bge rbind

      extra_rows <- Matrix::Matrix(0, ncol=ncol(exp),
                                   sparse=TRUE,
                                   nrow=length(not_in_g))
      row.names(extra_rows) <- not_in_g
      colnames(extra_rows) <- colnames(exp)

      # Append additional rows.
      if(bpcells_matrix_flag) {
        exp <- rbind2(exp, methods::as(extra_rows, 'IterableMatrix'))    # wraps dgCMatrix in IterableMatrix
      }
      else {
        exp <- rbind(exp, extra_rows)
      }
#      exp <- exp
    }

    # Gather matrices and data frames into lists.
    exprs_list[[i]] <- exp[gene_list, , drop=FALSE]
    fd_list[[i]] <- fd[gene_list, , drop=FALSE]
    pd_list[[i]] <- pd
  }

  all_fd <- array(NA,dim(fd_list[[1]]),dimnames(fd_list[[1]]))

  for (fd in fd_list) {
    for (j in colnames(fd)) {
      col_info <- all_fd[,j]
      col_info[is.na(col_info)] <- fd[is.na(col_info),j]
      col_info[col_info != fd[,j]] <- "conf"
      all_fd[,j] <- col_info
    }
  }

  confs <- sum(all_fd == "conf", na.rm=TRUE)

  if (confs > 0) {
   warning("When combining rowData, conflicting values were found - ",
                  "conflicts will be labelled 'conf' in the combined cds ",
                  "to prevent conflicts, either change conflicting values to ",
                  "match, or rename columns from different cds' to be unique.")
  }
  #all_fd <- do.call(cbind, fd_list)
  all_fd <- all_fd[,fdata_cols, drop=FALSE]

  # Build the final, comprehensive cell data frame and counts matrix.
  all_pd <- do.call(rbind, pd_list)    # bge rbind
  if(bpcells_matrix_flag) {
    num_exprs <- length(exprs_list)
    all_exp <- exprs_list[[1]]
    for(i in seq(2, num_exprs, 1)) {
      all_exp <- cbind2(all_exp, exprs_list[[i]])
    }
  }
  else {
    all_exp <- do.call(cbind, exprs_list)
  }

  # Filter counts matrix by fd and pd names.
  all_exp <- all_exp[row.names(all_fd), row.names(all_pd), drop=FALSE]

  # Make a BPCells count matrix, if necessary.
  if(bpcells_matrix_flag) {
    all_exp <- set_matrix_class(mat=all_exp, matrix_control=matrix_control)
  }

  # Make a combined CDS from all_exp, all_pd, and all_fd.
  new_cds <- new_cell_data_set(all_exp, cell_metadata = all_pd, gene_metadata = all_fd)
  new_cds <- set_matrix_citation(new_cds)

  # Add in preprocessing results.
  if(keep_reduced_dims) {
    # Find intersection of reduced dim names, for example, 'PCA', 'UMAP', 'Aligned'.
    reduced_dim_names <- names(reducedDims(cds_list[[1]]))
    for(i in seq(2, length(cds_list), 1)) {
      reduced_dim_names <- intersect(reduced_dim_names, names(reducedDims(cds_list[[i]])))
    }

    for(red_dim in reduced_dim_names) {
      reduced_dims_list <- list()
      for(j in seq(1, length(cds_list), 1)) {
        reduced_dims_list[[j]] <- SingleCellExperiment::reducedDims(cds_list[[j]])[[red_dim]]
      }
      # Modify for binding reduced dim spaces with differing numbers of rows.
      reduced_dims_list <- lapply(reduced_dims_list, function(x) as.data.frame(x))
      SingleCellExperiment::reducedDims(new_cds)[[red_dim]] <- bind_rows(reduced_dims_list)
#      SingleCellExperiment::reducedDims(new_cds, withDimnames=FALSE)[[red_dim]] <- do.call(rbind, reduced_dims_list, quote=FALSE)
      # The following should not happen; the accessor appears to ensure the
      # correct row order.
      if(!identical(rownames(SingleCellExperiment::reducedDims(new_cds)[[red_dim]]), rownames(all_pd))) {
        stop('Mis-ordered reduced matrix rows.')
      }
    }
  }

  # Add a BPCells row-major order matrix to assays
  # for BPCells count matrices.
  if(bpcells_matrix_flag) {
    new_cds <- set_cds_row_order_matrix(new_cds)
  }

  matrix_id <-  get_unique_id(counts(cds))
  new_cds <- initialize_counts_metadata(new_cds) 
  new_cds <- set_counts_identity(new_cds, 'combine_cds', matrix_id)

  new_cds
}


#' Clear CDS slots
#'
#' Function to clear all CDS slots besides colData, rowData and expression data.
#'
#' @param cds cell_data_set to be cleared
#'
#' @return A cell_data_set with only expression, rowData and colData present.
#' @export
#'
clear_cds_slots <- function(cds) {
  cds@reduce_dim_aux <- S4Vectors::SimpleList()
  cds@principal_graph_aux <- S4Vectors::SimpleList()
  cds@principal_graph <- S4Vectors::SimpleList()
  cds@clusters <- S4Vectors::SimpleList()
  SingleCellExperiment::reducedDims(cds) <- S4Vectors::SimpleList()
  cds
}


add_citation <- function(cds, citation_key) {
  citation_map <- list(
    UMAP = c("UMAP", "McInnes, L., Healy, J. & Melville, J. UMAP: Uniform Manifold Approximation and Projection for dimension reduction. Preprint at https://arxiv.org/abs/1802.03426 (2018)."),
    MNN_correct = c("MNN Correct", "Haghverdi, L. et. al. Batch effects in single-cell RNA-sequencing data are corrected by matching mutual nearest neighbors. Nat. Biotechnol. 36, 421-427 (2018). https://doi.org/10.1038/nbt.4091"),
    partitions = c("partitioning", c("Levine, J. H., et. al. Data-driven phenotypic dissection of AML reveals progenitor-like cells that correlate with prognosis. Cell 162, 184-197 (2015). https://doi.org/10.1016/j.cell.2015.05.047",
                                  "Wolf, F. A. et. al. PAGA: graph abstraction reconciles clustering with trajectory inference through a topology preserving map of single cells. Genome Biol. 20, 59 (2019). https://doi.org/10.1186/s13059-019-1663-x")),
    clusters = c("clustering", "Levine, J. H. et. al. Data-driven phenotypic dissection of AML reveals progenitor-like cells that correlate with prognosis. Cell 162, 184-197 (2015). https://doi.org/10.1016/j.cell.2015.05.047"),
    leiden = c("leiden", "Traag, V.A., Waltman, L. & van Eck, N.J. From Louvain to Leiden: guaranteeing well-connected communities. Scientific Reportsvolume 9, Article number: 5233 (2019). https://doi.org/10.1038/s41598-019-41695-z" ),
    bpcells = c("BPCells", "Parks, B & Abdi, I. BPCells: Single Cell Counts Matrices to PCA. https://bnprks.github.io/BPCells")
  )
  if (is.null(S4Vectors::metadata(cds)$citations) | citation_key == "Monocle") {
    S4Vectors::metadata(cds)$citations <- data.frame(method = c("Monocle", "Monocle", "Monocle"),
                                          citations = c("Trapnell C. et. al. The dynamics and regulators of cell fate decisions are revealed by pseudotemporal ordering of single cells. Nat. Biotechnol. 32, 381-386 (2014). https://doi.org/10.1038/nbt.2859",
                                                        "Qiu, X. et. al. Reversed graph embedding resolves complex single-cell trajectories. Nat. Methods 14, 979-982 (2017). https://doi.org/10.1038/nmeth.4402",
                                                        "Cao, J. et. al. The single-cell transcriptional landscape of mammalian organogenesis. Nature 566, 496-502 (2019). https://doi.org/10.1038/s41586-019-0969-x"))
  }
  S4Vectors::metadata(cds)$citations <- rbind(S4Vectors::metadata(cds)$citations,
                                   data.frame(method = citation_map[[citation_key]][1],
                                              citations = citation_map[[citation_key]][2]))
  cds
}

#' Access citations for methods used during analysis.
#'
#' @param cds The cds object to access citations from.
#'
#' @return A data frame with the methods used and the papers to be cited.
#' @export
#'
#' @examples {
#'   \dontrun{
#'      get_citations(cds)
#'   }
#' }
get_citations <- function(cds) {
  message("Your analysis used methods from the following recent work. ",
                "Please cite them wherever you are presenting your analyses.")
  if(is.null(S4Vectors::metadata(cds)$citations)) {
    cds <- add_citation(cds, "Monocle")
  }
  S4Vectors::metadata(cds)$citations
}


# Make a unique identifier string.
get_unique_id <- function(object=NULL) {
  if(!is.null(object)) {
    object_dim <- dim(object)
    if(!methods::is(object, 'IterableMatrix')) {
      object_checksum <- digest::digest(object)
    }
    else {
      object_checksum <- BPCells::checksum(object)
    }
    if(!is.null(object_dim))
      object_id <- list(checksum=object_checksum, dim=object_dim)
    else
      object_id <- list(checksum=object_checksum, dim=length(object))
  }
  else {
    id_count <- get_global_variable('id_count')
    rtime <- as.numeric(Sys.time()) * 100000 + id_count
    object_id <- openssl::md5(as.character(rtime))
    id_count <- id_count + 1
    set_global_variable('id_count', id_count)
  }

  return(object_id)
}


# Get current time stamp.
get_time_stamp <- function() {
  time_stamp <- format(Sys.time(), "%Y%m%d:%H%M%S" )
  return(time_stamp)
}


# Manage parallel processing for matrix multiplication.
matrix_multiply_multicore <- function(mat_a, mat_b, cores=1L) {
  assertthat::assert_that(is.matrix(mat_a) || is_sparse_matrix(mat_a) || methods::is(mat_a, 'DelayedMatrix'),
    msg=paste0('mat_a must be either a matrix or a sparse matrix'))
  assertthat::assert_that(is.matrix(mat_b) || is_sparse_matrix(mat_b) || methods::is(mat_b, 'DelayedMatrix'),
    msg=paste0('mat_b must be either a matrix or a sparse matrix'))

  if(cores > 1) {
    omp_num_threads <- get_global_variable('omp_num_threads')
    blas_num_threads <- get_global_variable('blas_num_threads')
  
    RhpcBLASctl::omp_set_num_threads(1L)
    RhpcBLASctl::blas_set_num_threads(1L)
  
    DelayedArray::setAutoBPPARAM(BPPARAM=BiocParallel::MulticoreParam(workers=as.integer(cores)))
  
    mat_c <- mat_a %*% mat_b
  
    DelayedArray::setAutoBPPARAM(BPPARAM=BiocParallel::SerialParam())
  
    RhpcBLASctl::omp_set_num_threads(as.integer(omp_num_threads))
    RhpcBLASctl::blas_set_num_threads(as.integer(blas_num_threads))
  } else {
    mat_c <- mat_a %*% mat_b
  }

  return(mat_c)
}


# Return the call stack as a character vector.
get_call_stack <- function () {
  cv<-as.vector(sys.calls())
  lcv <- length(cv)
  n <- lcv - 1

  ocv <- vector()
  for(i in seq(1,n,1)) {
    elem <- stringr::str_split(as.character(cv[i]), '[(]', n=2)[[1]][[1]]
    ocv <- c(ocv, elem)
  }

  return(ocv)
}


# Return the call stack as a character string.
get_call_stack_as_string <- function() {
  cs <- get_call_stack()
  scs <- ''
  for(i in seq(length(cs)-1)) {
    csep <- ifelse(i == 1, '', ' => ')
    scs <- sprintf("%s%s%s()", scs, csep, cs[[i]])
  }

  return(scs)
}


# Return the name of object as a string
object_name_to_string <- function( object ) {
  str <- deparse(substitute(object))

  return( str )
}


stop_no_noise <- function() {
  opt <- options(show.error.messages = FALSE)
  on.exit(options(opt))
  stop()
}


# number of tasks per block for multicore processing
# task vector limits
#   vbeg <- c(0,cumsum(tasks_per_block(11,3)))[1:3]+1
#   vend <- cumsum(tasks_per_block(11,3))
tasks_per_block <- function(ntask=NULL, nblock=NULL) {
  tasks_block <- rep(trunc(ntask/nblock), nblock)
  remain <- ntask %% nblock
  if(remain)
    for(i in seq(remain)) 
      tasks_block[i] <- tasks_block[i] + 1
  return(tasks_block)
}


#
# Initialize Monocle3 timer.
#
tick <- function(msg="") {
  set_global_variable('monocle3_timer_t0', Sys.time())
  set_global_variable('monocle3_timer_msg', msg)
}

#
# Return time elapsed since call to tick.
#
tock <- function() {
  t1 <- Sys.time()
  t0 <- get_global_variable('monocle3_timer_t0')
  msg <- get_global_variable('monocle3_timer_msg')
  if(length(msg) > 0) {
    message(sprintf('%s %.2f seconds.',msg, difftime(t1, t0, units = "secs")))
  }
  else {
    return(t1 - t0)
  }
}


#
# Convert octal file permission to 'rwx' string.
#
file_permission_o2rws <- function(iperm) {
  assertthat::assert_that(is.integer(iperm) && iperm >= 0 && iperm <= 7,
                          msg=paste("file_permission_o2rws: iperm must be between 0L and 7L, inclusive."))

  cnv_vec <- c('---', '--x', '-w-', '-wx',
               'r--', 'r-x', 'rw-', 'rwx')
  return(cnv_vec[iperm+1])
}


#
# Convert octal string to multiple rwx strings.
#
file_permission_os2rwx <- function(ostr) {
  assertthat::assert_that(assertthat::is.string(ostr),
                          msg=paste("file_permission_os2rwx: ostr be a string."))
  assertthat::assert_that(nchar(ostr) >= 3 && nchar(ostr) <= 4,
                          msg=paste("file_permission_os2rwx: ostr must have 3 or 4 characters."))

  # Drop the first of four characters. We are not interested in SUID, SGID, or Sticky Bit.
  if(length(ostr) == 4) {
    ostr <- substr(ostr, 2,4)
  }

  ocvec <- unlist(strsplit(ostr, ''))
  return(paste('owner:', file_permission_o2rws(as.integer(ocvec[1])),
               'group:', file_permission_o2rws(as.integer(ocvec[2])),
               'world:', file_permission_o2rws(as.integer(ocvec[3]))))
}


#
# Report file/directory status.
#
# Notes:
#   The arguments consists of one or more file or
#   directory paths given as strings.
#
#   Examples:
#     report_path_status('/Users/monocle_dev/git/monocle3')
#     report_path_status('monocle_objects.20240426', 'monocle_transform_models.20240426')
#
report_path_status <- function(...) {
  path_list <- list(...)
  npath <- length(path_list)
  msg <- 'File and directory information:\n'
  for(i in seq(npath)) {
    if(i > 1) {
      msg <- paste0(msg, '\n\n')
    }
    path <- path_list[[i]]
    msg <- paste0(msg, '  input path: ', path)

    normalized_path <- normalizePath(path, mustWork=FALSE)
    msg <- paste0(msg, '\n  normalized path: ', normalized_path)
    pmod <- file.info(normalized_path, TRUE)
    # Does path exist?
    if(!file.exists(normalized_path)) {
      if(is.na(pmod[['size']][1])) {
        msg <- paste0(msg, '\n  \'', normalized_path, '\' does not exist')
        return(msg)
      }
    }

    # Is path a file or directory?
    if(pmod[['isdir']][1]) {
      msg <- paste0(msg, '\n  directory ')
    }
    else {
      msg <- paste0(msg, '\n  file ')
    }

    # What are path permissions?
     msg <- paste0(msg, 'has permissions (', pmod[['mode']][1], '): ', file_permission_os2rwx(as.character(pmod[['mode']][1])))

    # If path is a file, report md5 checksum.
    if(!pmod[['isdir']][1]) {
      msg <- paste0(msg, '\n  md5sum: ', tools::md5sum(normalized_path))
    }

  }
  return(msg)
}


report_checksum_difference <- function(calling_function_name, file_name, checksum_current, checksum_saved) {
  paste0(calling_function_name, ': inconsistent checksum values for \'', file_name, '\'\n', '  saved checksum:   ', checksum_saved, '\n', '  current checksum: ', checksum_current)
}
cole-trapnell-lab/monocle3 documentation built on June 11, 2025, 11:22 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
cole-trapnell-lab/monocle3
Clustering, Differential Expression, and Trajectory Analysis for Single-Cell RNA-Seq

R/utils.R
In cole-trapnell-lab/monocle3: Clustering, Differential Expression, and Trajectory Analysis for Single-Cell RNA-Seq

Documented in clear_cds_slots combine_cds detect_genes estimate_size_factors get_citations mc_es_apply normalized_counts

R Package Documentation

Browse R Packages

We want your feedback!

cole-trapnell-lab/monocle3 Clustering, Differential Expression, and Trajectory Analysis for Single-Cell RNA-Seq

R/utils.R In cole-trapnell-lab/monocle3: Clustering, Differential Expression, and Trajectory Analysis for Single-Cell RNA-Seq

Documented in clear_cds_slots combine_cds detect_genes estimate_size_factors get_citations mc_es_apply normalized_counts

R Package Documentation

Browse R Packages

We want your feedback!

cole-trapnell-lab/monocle3
Clustering, Differential Expression, and Trajectory Analysis for Single-Cell RNA-Seq

R/utils.R
In cole-trapnell-lab/monocle3: Clustering, Differential Expression, and Trajectory Analysis for Single-Cell RNA-Seq