R/io.R

Defines functions load_monocle_rds load_monocle_objects save_monocle_objects test_hdf5_assays load_transform_models save_transform_models report_files_saved load_umap_nn_indexes save_umap_nn_indexes load_hnsw_index save_hnsw_index load_umap_annoy_index save_umap_annoy_index load_annoy_index save_annoy_index update_hnsw_index update_annoy_index load_mtx_data load_mm_data load_annotations_data is_matrix_market_file load_worm_l2 load_worm_embryo load_a549

Documented in load_a549 load_mm_data load_monocle_objects load_mtx_data load_transform_models load_worm_embryo load_worm_l2 save_monocle_objects save_transform_models

#' Build a small cell_data_set.
#'
#' @return cds object
#' @examples
#'   \donttest{
#'     cds <- load_a549()
#'   }
#'
#' @export
load_a549 <- function(){
  small_a549_colData_df <- readRDS(system.file("extdata",
          "small_a549_dex_pdata.rda",
          package = "monocle3"))
  small_a549_rowData_df <- readRDS(system.file("extdata",
          "small_a549_dex_fdata.rda",
          package = "monocle3"))
  small_a549_exprs <- readRDS(system.file("extdata",
          "small_a549_dex_exprs.rda",
          package = "monocle3"))
  small_a549_exprs <- small_a549_exprs[,row.names(small_a549_colData_df)]

  cds <- new_cell_data_set(expression_data = small_a549_exprs,
      cell_metadata = small_a549_colData_df,
      gene_metadata = small_a549_rowData_df)
  cds
}


#' Build a cell_data_set from C. elegans embryo data.
#' @return cds object
#' @importFrom SingleCellExperiment counts
#' @export
load_worm_embryo <- function(){
  expression_matrix <- readRDS(url("https://depts.washington.edu:/trapnell-lab/software/monocle3/celegans/data/packer_embryo_expression.rds"))
  cell_metadata <- readRDS(url("https://depts.washington.edu:/trapnell-lab/software/monocle3/celegans/data/packer_embryo_colData.rds"))
  gene_annotation <- readRDS(url("https://depts.washington.edu:/trapnell-lab/software/monocle3/celegans/data/packer_embryo_rowData.rds"))
  gene_annotation$use_for_ordering <- NULL

  cds <- new_cell_data_set(expression_matrix,
      cell_metadata = cell_metadata,
      gene_metadata = gene_annotation)
  cds <- estimate_size_factors(cds)

  cds <- initialize_counts_metadata(cds)
  matrix_id <- get_unique_id(counts(cds))
  cds <- set_counts_identity(cds, 'URL: https://depts.washington.edu:/trapnell-lab/software/monocle3/celegans/data/packer_embryo_expression.rds', matrix_id)

  cds
}


#' Build a cell_data_set from C. elegans L2 data.
#' @return cds object
#' @importFrom SingleCellExperiment counts
#' @export
load_worm_l2 <- function(){
  expression_matrix <- readRDS(url("https://depts.washington.edu:/trapnell-lab/software/monocle3/celegans/data/cao_l2_expression.rds"))
  cell_metadata <- readRDS(url("https://depts.washington.edu:/trapnell-lab/software/monocle3/celegans/data/cao_l2_colData.rds"))
  gene_annotation <- readRDS(url("https://depts.washington.edu:/trapnell-lab/software/monocle3/celegans/data/cao_l2_rowData.rds"))

  cds <- new_cell_data_set(expression_matrix,
      cell_metadata = cell_metadata,
      gene_metadata = gene_annotation)
  cds <- estimate_size_factors(cds)

  cds <- initialize_counts_metadata(cds)
  matrix_id <- get_unique_id(counts(cds))
  cds <- set_counts_identity(cds, 'URL: https://depts.washington.edu:/trapnell-lab/software/monocle3/celegans/data/cao_l2_expression.rds', matrix_id)

  cds
}


#' Test if a file has a Matrix Market header.
#' @param matpath Path to test file.
#' @return TRUE if matpath file has Matrix Market header.
#' @noRd
is_matrix_market_file <- function( matpath )
{
  first_line <- utils::read.table( matpath, nrows=1 )
  grepl( "%%MatrixMarket", first_line$V1 )
}


#' Load matrix dimension metadata from file.
#' @param anno_path Path to matdim annotation file.
#' @return list consisting of matdim_names, which label matrix dimension,
#' and metadata, if present in the file, which are
#' additional dimension metadata.
#' @noRd
load_annotations_data <- function( anno_path, metadata_column_names=NULL, header=FALSE, sep="", quote="\"'", annotation_type=NULL )
{
  assertthat::assert_that( ! is.null( annotation_type ) )
  tryCatch(
      {
        annotations <- utils::read.table( anno_path, header=header, sep=sep, quote=quote, stringsAsFactors=FALSE )
      }, error = function( emsg )
      {
        stop( 'load_mm_data: bad status reading ', annotation_type, ' file \'', anno_path, '\'\n  ',
            emsg, '\n',
            '  note: possible problems include the wrong filename, a missing file,\n',
            '  and incorrect file format parameters, for example \'header\', \'sep\', and \'quote\'' )
      })

  metadata = NULL
  if( .row_names_info( annotations ) < 0 )
  {
    names <- annotations[,1]
    if( ncol( annotations ) > 1 )
      metadata <- annotations[,-1,drop=FALSE]
  }
  else
  {
    names <- rownames( annotations )
    metadata <- annotations
  }

  if( ! ( is.null( metadata_column_names ) || is.null( metadata ) ) )
  {
    assertthat::assert_that( length( metadata_column_names ) == ncol( metadata ),
        msg=paste0( annotation_type,
            ' metadata column name count (',
            length( metadata_column_names ),
            ') != ',
            annotation_type,
            'annotation column count (',
            ncol( metadata ),
            ')' ) )
    colnames( metadata ) <- metadata_column_names
  }

  if( ! is.null( metadata ) )
    rownames(metadata) <- names

  list( names=names, metadata=metadata )
}


#' Load data from matrix market format files.
#'
#' @param mat_path Path to the Matrix Market .mtx matrix file. The
#' values are read and stored  as a sparse matrix with nrows and ncols,
#' as inferred from the file. Required.
#' @param feature_anno_path Path to a feature annotation file. The
#' feature_anno_path file must have nrows lines and at least one column.
#' The values in the first column label the matrix rows and each must be
#' distinct in the column. Values in additional columns are stored in
#' the cell_data_set 'gene' metadata. For gene features, we urge use of
#' official gene IDs for labels, such as Ensembl or Wormbase IDs. In this
#' case, the second column has typically a 'short' gene name. Additional
#' information such as gene_biotype may be stored in additional columns
#' starting with column 3. Required.
#' @param cell_anno_path Path to a cell annotation file. The cell_anno_path
#' file must have ncols lines and at least one column. The values in the
#' first column label the matrix columns and each must be distinct in the
#' column. Values in additional columns are stored in the cell_data_set
#' cells metadata. Required.
#' @param header Logical set to TRUE if both feature_anno_path and
#' cell_anno_path files have column headers, or set to FALSE if both
#' files do not have column headers (only these cases are supported).
#' The files may have either ncols or ncols-1 header fields. In both
#' cases, the first column is used as the matrix dimension names. The
#' default is FALSE.
#' @param feature_metadata_column_names A character vector of feature
#' metadata column names. The number of names must be one less than the
#' number of columns in the feature_anno_path file. These values
#' will replace those read from the feature_anno_path file header,
#' if present. The default is NULL.
#' @param cell_metadata_column_names A character vector of cell
#' metadata column names. The number of names must be one less than the
#' number of columns in the cell_anno_path file. These values will
#' replace those read from the cell_anno_path file header, if present.
#' The default is NULL.
#' @param quote A character string specifying the quoting characters
#' used in the feature_anno_path and cell_anno_path files. The default
#' is "\"'".
#' @param umi_cutoff UMI per cell cutoff. Columns (cells) with less
#' than umi_cutoff total counts are removed from the matrix. The
#' default is 100.
#' @param sep field separator character in the annotation files. If
#' sep = "", the separator is white space, that is, one or more spaces,
#' tabs, newlines, or carriage returns. The default is the tab
#' character for tab-separated-value files.
#'
#' @return cds object
#'
#' @section Comments:
#' * load_mm_data estimates size factors.
#'
#' @examples
#'   \donttest{
#'     pmat<-system.file("extdata", "matrix.mtx.gz", package = "monocle3")
#'     prow<-system.file("extdata", "features_c3h0.txt", package = "monocle3")
#'     pcol<-system.file("extdata", "barcodes_c2h0.txt", package = "monocle3")
#'     cds <- load_mm_data( pmat, prow, pcol,
#'                          feature_metadata_column_names =
#'                          c('gene_short_name', 'gene_biotype'), sep='' )
#'
#'     # In this example, the features_c3h0.txt file has three columns,
#'     # separated by spaces. The first column has official gene names, the
#'     # second has short gene names, and the third has gene biotypes.
#'   }
#'
#' @importFrom SingleCellExperiment counts
#' @export
load_mm_data <- function( mat_path,
    feature_anno_path,
    cell_anno_path,
    header = FALSE,
    feature_metadata_column_names = NULL,
    cell_metadata_column_names = NULL,
    umi_cutoff = 100,
    quote="\"'",
    sep="\t") {
  assertthat::assert_that(assertthat::is.readable(mat_path), msg='unable to read matrix file')
  assertthat::assert_that(assertthat::is.readable(feature_anno_path), msg='unable to read feature annotation file')
  assertthat::assert_that(assertthat::is.readable(cell_anno_path), msg='unable to read cell annotation file')
  assertthat::assert_that(is.numeric(umi_cutoff))

  feature_annotations <- load_annotations_data( feature_anno_path, feature_metadata_column_names, header, sep, quote=quote, annotation_type='features' )
  cell_annotations <- load_annotations_data( cell_anno_path, cell_metadata_column_names, header, sep, quote=quote, annotation_type='cells' )

  assertthat::assert_that( ! any( duplicated( feature_annotations$names ) ), msg='duplicate feature names in feature annotation file' )
  assertthat::assert_that( ! any( duplicated( cell_annotations$names ) ), msg='duplicate cell names in cell annotation file' )

  mat <- Matrix::readMM( mat_path )

  assertthat::assert_that( length( feature_annotations$names ) == nrow( mat ),
      msg=paste0( 'feature name count (',
          length( feature_annotations$names ),
          ') != matrix row count (',
          nrow( mat ),
          ')' ) )
  assertthat::assert_that( length( cell_annotations$names ) == ncol( mat ),
      msg=paste0( 'cell name count (',
          length( cell_annotations$names ),
          ') != matrix column count (',
          ncol( mat ),
          ')' ) )

  rownames( mat ) <- feature_annotations$names
  colnames( mat ) <- cell_annotations$names

  cds <- new_cell_data_set( mat,
      cell_metadata = cell_annotations$metadata,
      gene_metadata = feature_annotations$metadata )

  colData(cds)$n.umi <- Matrix::colSums(exprs(cds))
  cds <- cds[,colData(cds)$n.umi >= umi_cutoff]
  cds <- estimate_size_factors(cds)

  cds <- initialize_counts_metadata(cds)
  matrix_id <- get_unique_id(counts(cds))
  cds <- set_counts_identity(cds, mat_path, matrix_id)

  return( cds )
}


#' Load data from matrix market format
#'
#' @param mat_path Path to the .mtx matrix market file.
#' @param gene_anno_path Path to gene annotation file.
#' @param cell_anno_path Path to cell annotation file.
#' @param umi_cutoff UMI per cell cutoff, default is 100.
#'
#' @return cds object
#' @importFrom SingleCellExperiment counts
#'
#' @examples
#'   \donttest{
#'     pmat<-system.file("extdata", "matrix.mtx.gz", package = "monocle3")
#'     prow<-system.file("extdata", "features_c3h0.txt", package = "monocle3")
#'     pcol<-system.file("extdata", "barcodes_c2h0.txt", package = "monocle3")
#'     cds <- load_mtx_data( pmat, prow, pcol)
#'   }
#'
#' @export
load_mtx_data <- function( mat_path,
    gene_anno_path,
    cell_anno_path,
    umi_cutoff = 100) {
  assertthat::assert_that(assertthat::is.readable(mat_path))
  assertthat::assert_that(assertthat::is.readable(gene_anno_path))
  assertthat::assert_that(assertthat::is.readable(cell_anno_path))
  assertthat::assert_that(is.numeric(umi_cutoff))

  if( is_matrix_market_file( mat_path ) )
  {
    #
    # Read an feature annotation file with two tab-separated
    # columns where the second column has short gene names.
    # Read a cell annotation file with one column that has the
    # matrix row names.
    #
    cds <- load_mm_data( mat_path,
        gene_anno_path,
        cell_anno_path,
        feature_metadata_column_names=c('gene_short_name'),
        umi_cutoff=umi_cutoff,
        sep="\t" )
    return( cds )
  }

  df <- utils::read.table(mat_path, col.names = c("gene.idx", "cell.idx", "count"),
      colClasses = c("integer", "integer", "integer"))

  gene.annotations <- utils::read.table(gene_anno_path,
      col.names = c("id", "gene_short_name"),
      colClasses = c("character", "character"))

  cell.annotations <- utils::read.table(cell_anno_path, col.names = c("cell"),
      colClasses = c("character"))

  rownames(gene.annotations) <- gene.annotations$id
  rownames(cell.annotations) <- cell.annotations$cell

  # add a dummy cell to ensure that all genes are included in the matrix
  # even if a gene isn't expressed in any cell
  df <- rbind(df, data.frame(gene.idx = c(1, nrow(gene.annotations)),
          cell.idx = rep(nrow(cell.annotations) + 1, 2),
          count = c(1, 1)))

  mat <- Matrix::sparseMatrix(i = df$gene.idx, j = df$cell.idx, x = df$count)

  if(ncol(mat) == 1) {
    mat <- mat[,0, drop=FALSE]
  } else {
    if(ncol(mat) < 2) warning('bad loop: ncol(mat) < 2')
    mat <- mat[, 1:(ncol(mat)-1), drop=FALSE]
  }

  rownames(mat) <- gene.annotations$id
  colnames(mat) <- cell.annotations$cell

  cds <- new_cell_data_set(mat, cell_metadata = cell.annotations,
      gene_metadata = gene.annotations)
  colData(cds)$n.umi <- Matrix::colSums(exprs(cds))
  cds <- cds[,colData(cds)$n.umi >= umi_cutoff]
  cds <- estimate_size_factors(cds)

  cds <- initialize_counts_metadata(cds)
  matrix_id <- get_unique_id(counts(cds))
  cds <- set_counts_identity(cds, mat_path, matrix_id)

  return(cds)
}


update_annoy_index <- function(annoy) {
  if(!is.null(annoy[['nn_index']][['version']]) && annoy[['nn_index']][['version']] == 2) {
    annoy_out <- annoy
  }
  else
  if(!is.null(annoy[['nn_index']][['version']]) && annoy[['nn_index']][['version']] == 1) {
    annoy_out <- S4Vectors::SimpleList()
    annoy_out[['nn_index']] <- list()
    annoy_out[['nn_index']][['method']] <- 'annoy'
    annoy_out[['nn_index']][['annoy_index']] <- annoy[['nn_index']][['annoy_index']]
    annoy_out[['nn_index']][['version']] <- 2
    annoy_out[['nn_index']][['annoy_index_version']] <- annoy[['index_version']]
    annoy_out[['nn_index']][['metric']] <- annoy[['metric']]
    annoy_out[['nn_index']][['n_trees']] <- annoy[['n_trees']]
    annoy_out[['nn_index']][['nrow']] <- annoy[['nrow']]
    annoy_out[['nn_index']][['ncol']] <- annoy[['ncol']]
    annoy_out[['nn_index']][['checksum_rownames']] <- NA_character_
    annoy_out[['matrix_id']] <- annoy[['matrix_id']]
    annoy_out[['updated']] <- TRUE
  }
  else
  if(!is.null(annoy[['nn_index']][['type']]) && annoy[['nn_index']][['type']] == 'annoyv1') {
    annoy_out <- S4Vectors::SimpleList()
    annoy_out[['nn_index']] <- list()
    annoy_out[['nn_index']][['method']] <- 'annoy'
    annoy_out[['nn_index']][['annoy_index']] <- annoy[['nn_index']][['ann']]
    annoy_out[['nn_index']][['version']] <- 2
    annoy_out[['nn_index']][['annoy_index_version']] <- annoy[['index_version']]
    annoy_out[['nn_index']][['metric']] <- annoy[['metric']]
    annoy_out[['nn_index']][['n_trees']] <- annoy[['n_trees']]
    annoy_out[['nn_index']][['nrow']] <- annoy[['nrow']]
    annoy_out[['nn_index']][['ncol']] <- annoy[['ncol']]
    annoy_out[['nn_index']][['checksum_rownames']] <- NA_character_
    annoy_out[['matrix_id']] <- annoy[['matrix_id']]
    annoy_out[['updated']] <- TRUE
  }
  else
  if(is.null(annoy[['nn_index']][['type']])) {
    annoy_out <- S4Vectors::SimpleList()
    annoy_out[['nn_index']] <- list()
    annoy_out[['nn_index']][['method']] <- 'annoy'
    annoy_out[['nn_index']][['annoy_index']] <- annoy[['nn_index']]
    annoy_out[['nn_index']][['version']] <- 2
    annoy_out[['nn_index']][['annoy_index_version']] <- NA_character_
    annoy_out[['nn_index']][['metric']] <- annoy[['metric']]
    annoy_out[['nn_index']][['metric']] <- ifelse(!is.null(annoy[['metric']]), annoy[['metric']], NA_character_)
    annoy_out[['nn_index']][['n_trees']] <- ifelse(!is.null(annoy[['n_trees']]), annoy[['n_trees']], NA_integer_)
    annoy_out[['nn_index']][['nrow']] <- ifelse(!is.null(annoy[['n_trees']]), annoy[['n_trees']], NA_integer_)
    annoy_out[['nn_index']][['ncol']] <- ifelse(!is.null(annoy[['n_trees']]), annoy[['n_trees']], NA_integer_)
    annoy_out[['nn_index']][['checksum_rownames']] <- NA_character_
    annoy_out[['matrix_id']] <- NA_character_
    annoy_out[['updated']] <- TRUE
  }

  return(annoy_out)
}


update_hnsw_index <- function(hnsw) {
  if(!is.null(hnsw[['nn_index']][['version']]) && hnsw[['nn_index']][['version']] == 1) {
    hnsw_out <- hnsw
  }
  else
  if(!is.null(hnsw[['nn_index']][['version']]) && hnsw[['nn_index']][['version']] == 1) {
    hnsw_out <- S4Vectors::SimpleList()
    annoy_out[['nn_index']] <- list()
    hnsw_out[['nn_index']][['method']] <- 'hnsw'
    hnsw_out[['nn_index']][['hnsw_index']] <- hnsw[['nn_index']]
    hnsw_out[['nn_index']][['version']] <- 1
    hnsw_out[['nn_index']][['hnsw_index_version']] <- hnsw[['index_version']]
    hnsw_out[['nn_index']][['metric']] <- hnsw[['metric']]
    hnsw_out[['nn_index']][['M']] <- hnsw[['M']]
    hnsw_out[['nn_index']][['ef_construction']] <- hnsw[['ef_construction']]
    hnsw_out[['nn_index']][['nrow']] <- hnsw[['nrow']]
    hnsw_out[['nn_index']][['ncol']] <- hnsw[['ncol']]
    hnsw_out[['nn_index']][['checksum_rownames']] <- NA_character_
    hnsw_out[['matrix_id']] <- hnsw[['matrix_id']]
    annoy_out[['updated']] <- TRUE
  }

  return(hnsw_out)
}


# Save Annoy model files.
# Complexities
#   o  uwot annoy model object is declared to be subject to change
#   o  uwot annoy object changes some time between releases
#      0.1.8 and 0.1.10: the annoy index moved from
#      object$nn_index to object$nn_index$ann
#   o  the uwot annoy object may have more than one metric
#   o  see uwot/R/uwot.R functions save_uwot and load_uwot
# Notes
#   o  uwot 0.1.10 has nn_index$type='annoyv1', which may be a
#      umap annoy object version number. It does not exist for
#      uwot 0.1.8. nn_index$ann does not exist either.
#   o  the nn_index parameter is the nn_index object in the
#      uwot umap model and returned by uwot::annoy_build()
#         uwot v0.1.8::annoy_build() returns
#           ann <- uwot::create_ann()
#             ...
#           return ann
#         uwot v0.1.10::annoy_build() returns
#           annoy <- annoy_create(metric, nc)
#                 ...
#               ann <- annoy$ann
#                 ...
#               for (i in 1:nr) {
#                 ann$addItem(i - 1, X[i, ])
#               }
#                 ...
#           return annoy
#        where annoy is assigned to nn_index in either the
#        UMAP model or by uwot::annoy_build().
# untested code when is.null(nn_index[['type']])
save_annoy_index <- function(nn_index, file_name) {
  if(!is.null(nn_index[['version']])) {
    if(nn_index[['version']] == 1 || nn_index[['version']] == 2) {
      tryCatch( nn_index[['annoy_index']]$save(file_name),
                error = function(e) {message('Unable to save annoy index: it may not exist in this cds: error message is ', e)})
    } else {
      stop('Unrecognized Monocle3 annoy index type')
    }
  }
  else
  if(!is.null(nn_index[['type']])) {
    if(nn_index[['type']] == 'annoyv1') {
      tryCatch( nn_index[['ann']]$save(file_name),
                error = function(e) {message('Unable to save annoy index: it may not exist in this cds: error message is ', e)})
    }
    else {
      stop('Unrecognized uwot annoy index type')
    }
  }
  else {
    nn_index$save(file_name)
  }
}


# see comments for save_annoy_index
# modified for RcppAnnoy
load_annoy_index <- function(nn_index, file_name, metric, ndim) {
  if(!is.null(nn_index[['version']])) {
    if(nn_index[['version']] == 1 || nn_index[['version']] == 2) {
      annoy_index <- new_annoy_index(metric, ndim)
      tryCatch(
        {
          annoy_index$load(file_name)
        }, error = function(emsg)
        {
          stop('load_annoy_index: bad status reading annoy index file')
        }
      )
      nn_index[['annoy_index']] <- annoy_index
    }
    else {
      stop('Unrecognized annoy index type')
    }
  }
  else
  if(!is.null(nn_index[['type']])) {
    if(nn_index[['type']] == 'annoyv1') {
      annoy_index <- new_annoy_index(metric, ndim)
      tryCatch(
        {
          annoy_index$load(file_name)
        }, error = function(emsg)
        {
          stop('load_annoy_index: bad status reading annoy index file')
        }
      )
      nn_index[['ann']] <- annoy_index
    }
    else {
      stop('Unrecognized annoy index type')
    }
  }
  else {
    # Assume to be an older uwot annoy index version.
    nn_index <- new_annoy_index(metric, ndim)
    nn_index$load(file_name)
  }
  return(nn_index)
}


save_umap_annoy_index <- function(nn_index, file_name) {
  if(!is.null(nn_index[['type']])) {
    if(nn_index[['type']] == 'annoyv1') {
      tryCatch( nn_index[['ann']]$save(file_name),
                error = function(e) {message('Unable to save annoy index: it may not exist in this cds: error message is ', e)})
    }
    else {
      stop('Unrecognized umap annoy index type')
    }
  }
  else {
    nn_index$save(file_name)
  }
}


load_umap_annoy_index <- function(nn_index, file_name, metric, ndim) {
  if(!is.null(nn_index[['type']])) {
    if(nn_index[['type']] == 'annoyv1') {
      annoy_index <- new_annoy_index(metric, ndim)
      tryCatch(
        {
          annoy_index$load(file_name)
        }, error = function(emsg)
        {
          stop('load_annoy_index: bad status reading annoy index file')
        }
      )
      nn_index[['ann']] <- annoy_index
    }
    else {
      stop('Unrecognized umap annoy index type')
    }
  }
  else {
    # Assume to be an older uwot annoy index version.
    nn_index <- new_annoy_index(metric, ndim)
    tryCatch(
      {
        nn_index$load(file_name)
      }, error = function(emsg)
      {
        stop('load_annoy_index: bad status reading annoy index file')
      }
    )
  }
  return(nn_index)
}


save_hnsw_index <- function(nn_index, file_name) {
  if(is.null(nn_index)) return()

  if(!is.null(nn_index[['version']])) {
    out_index <- nn_index[['hnsw_index']]
  }
  else {
    out_index <- nn_index
  }
  tryCatch(out_index$save(file_name),
           error = function(e) {message('Unable to save hnsw index: it may not exist in this cds: error message is ', e)})
}


load_hnsw_index <- function(nn_index, file_name, metric, ndim) {
  if(metric == 'l2') {
    tryCatch(
      {
        new_index <- methods::new(RcppHNSW::HnswL2, ndim, file_name)
      }, error = function(emsg)
      {
        stop('load_hnsw_index: bad status reading hnsw index file')
      }
    )
    unlink(file_name)
  } else
  if(metric == 'euclidean') {
    tryCatch(
      {
        new_index <- methods::new(RcppHNSW::HnswL2, ndim, file_name)
      }, error = function(emsg)
      {
        stop('load_hnsw_index: bad status reading hnsw index file')
      }
    )
    attr(new_index, "distance") <- "euclidean"
    unlink(file_name)
  } else
    if(metric == 'cosine') {
    tryCatch(
      {
        new_index <- methods::new(RcppHNSW::HnswCosine, ndim, file_name)
      }, error = function(emsg)
      {
        stop('load_hnsw_index: bad status reading hnsw index file')
      }
    )
    unlink(file_name)
  } else
  if(metric == 'ip') {
    tryCatch(
      {
        new_index <- methods::new(RcppHNSW::HnswIp, ndim, file_name)
      }, error = function(emsg)
      {
        stop('load_hnsw_index: bad status reading hnsw index file')
      }
    )
    unlink(file_name)
  } else
    stop('Unrecognized HNSW metric ', metric)

  if(!is.null(nn_index[['version']]))
    nn_index[['hnsw_index']] <- new_index
  else
    nn_index <- new_index

  return(nn_index)
}


# Save umap annoy indexes to files and return md5sum
# value(s) as either a character string, in case of
# one metric, or a list, in case of more than one matric.
save_umap_nn_indexes <- function(umap_model, file_name) {
  if(is.null(umap_model[['metric']])) {
    stop('No UMAP model in this CDS.')
  }
  metrics <- names(umap_model[['metric']])
  n_metrics <- length(metrics)
  if(n_metrics == 1) {
    save_umap_annoy_index(umap_model[['nn_index']], file_name)
    md5sum_umap_index <- tools::md5sum(file_name)
  } else {
    warning('save_umap_nn_indexes is untested with more than one umap metric')
    md5sum_vec <- character()
    for(i in seq(1, n_metrics, 1)) {
      file_name_expand <- paste0(file_name, i)
      save_umap_annoy_index(umap_model[['nn_index']][[i]], file_name_expand)
      md5sum <- tools::md5sum(file_name_expand)
      append(md5sum_vec, md5sum)
    }
    md5sum_umap_index <- paste(md5sum_vec, collapse='_')
  }
  return(md5sum_umap_index)
}


# Load umap annoy indexes into umap_model and return umap_model.
load_umap_nn_indexes <- function(umap_model, file_name, md5sum_umap_index) {
  metrics <- names(umap_model[['metric']])
  n_metrics <- length(metrics)
  if(n_metrics == 1) {
    md5sum <- tools::md5sum(file_name)
    if(!is.null(md5sum_umap_index) && md5sum != md5sum_umap_index) {
      stop('The UMAP annoy index file, \'', file_name, '\', differs from the file made using the save_reduce_dimension_model() function.')
    }
    metric <- metrics[[1]]
    annoy_metric <- if(metric == 'correlation') 'cosine' else metric
    annoy_ndim <- umap_model[['metric']][[1]][['ndim']]
    umap_model[['nn_index']] <- load_umap_annoy_index(umap_model[['nn_index']], file_name, annoy_metric, annoy_ndim)
  } else {
    warning('load_umap_nn_indexes is untested with more than one umap metric')
    if(!is.null(md5sum_umap_index)) {
      md5sum_vec <- unlist(strsplit(md5sum_umap_index, '_', fixed=TRUE))
    }
    for(i in seq(1, n_metrics, 1)) {
      file_name_expand <- paste0(file_name, i)
      md5sum <- tools::md5sum(file_name_expand)
      if(!is.null(md5sum_umap_index) && md5sum != md5sum_vec[[i]]) {
        stop('The UMAP annoy index file, \'', file_name_expand, '\', differs from the file made using the save_reduce_dimension_model() function.')
      }
      metric <- metrics[[i]]
      annoy_metric <- if(metric == 'correlation') 'cosine' else metric
      annoy_ndim <- length(umap_model[['metric']][[i]])
      umap_model[['nn_index']][[i]] <- load_umap_annoy_index(umap_model[['nn_index']][[i]], file_name_expand, annoy_metric, annoy_ndim)
    }
  }
  return(umap_model)
}


#
# Report files saved.
#
report_files_saved <- function(file_index) {
  appendLF <- TRUE
  files <- file_index[['files']]
  for( i in seq_along(files[['cds_object']])) {
    cds_object <- files[['cds_object']][[i]]
    reduction_method <- files[['reduction_method']][[i]]
    if(cds_object == 'cds') {
      process <- 'cell_data_set'
      reduction_method <- 'full_cds'
    } else
    if(cds_object == 'reduce_dim_aux') {
      if(reduction_method == 'Aligned') {
        process <- 'align_cds'
      } else
      if(reduction_method == 'PCA' || reduction_method == 'LSI') {
        process <- 'preprocess_cds'
      } else
      if(reduction_method == 'tSNE' || reduction_method == 'UMAP') {
        process <- 'reduce_dimension'
      } else {
        stop('Unrecognized preprocess reduction_method \'', reduction_method, '\'')
      }
    } else {
      stop('Unrecognized cds_object value \'', files[['cds_object']][[i]], '\'')
    }
    file_format <- files[['file_format']][[i]]
    if(file_format == 'rds') {
      file_type <- 'RDS'
    } else
    if(file_format == 'hdf5') {
      file_type <- 'RDS_HDF5'
    } else
    if(file_format == 'annoy_index' || file_format == 'hnsw_index') {
      file_type <- 'NN_index'
    } else
    if(file_format == 'umap_annoy_index') {
      file_type <- 'UMAP_NN_index'
    } else {
      stop('Unrecognized file_format value \'', file_format, '\'')
    }

    file_name <- basename(files[['file_path']][[i]])
    message('  ', file_name, '  (', reduction_method, '  ', file_type, '  from  ', process, ')', appendLF=appendLF)
  }
}


#
#' Save cell_data_set transform models.
#'
#' Save the transform models in the cell_data_set to the
#' specified directory by writing the R objects to RDS
#' files and the nearest neighbor indexes to
#' index files. save_transform_models saves transform
#' models made by running the preprocess_cds and
#' reduce_dimension functions on an initial cell_data_set.
#' Subsequent cell_data_sets are transformed into the
#' reduced dimension space of the initial cell_data_set by
#' loading the new data into a new cell_data_set, loading
#' the initial data set transform models into the new
#' cell_data_set using the load_transform_models function,
#' and applying those transform models to the new data set
#' using the preprocess_transform and
#' reduce_dimension_transform functions. In this case, do
#' not run the preprocess_cds or reduce_dimension
#' functions on the new cell_data_set. Additionally,
#' save_transform_models saves nearest neighbor indexes
#' when the preprocess_cds and reduce_dimension
#' functions are run with the make_nn_index=TRUE parameter.
#' These indexes are used to find matches between cells in
#' the new processed cell_data_set and the initial
#' cell_data_set using index search functions. For more
#' information see the help for transfer_cell_labels.
#' save_transform_models saves the models to a directory
#' given by directory_path.
#'
#' @param cds a cell_data_set with existing models.
#' @param directory_path a string giving the name of the directory
#'   in which to write the model files.
#' @param comment a string with optional notes that is saved with
#'   the objects.
#' @param verbose a boolean determining whether to print information
#'   about the saved files.
#'
#' @return none.
#'
#' @examples
#'   \dontrun{
#'     cds <- load_a549()
#'     cds <- preprocess_cds(cds)
#'     cds <- reduce_dimension(cds)
#'     save_transform_models(cds, 'tm')
#'   }
#'
#' @export
# Bioconductor forbids writing to user directories so examples
# is not run.
save_transform_models <- function( cds, directory_path, comment="", verbose=TRUE) {
  appendLF <- TRUE
  # file information is written to an RDS file
  # in directory_path
  #   cds_object: reduce_dim_aux
  #   reduction_method: PCA | LSI | Aligned | UMAP ...
  #   nn_method: annoy | hnsw
  #   object_spec: ex: cds@reduce_dim_aux[[reduction_method]]
  #   file_format: rds | annoy_index | umap_annoy_index | hnsw_index
  #   file_path: path within directory_path (need file name)
  #   file_md5sum: md5sum of file(s)
  file_index <- list( 'save_function' = 'save_transform_models',
                      'archive_date' = Sys.time(),
                      'r_version' = R.Version()$version.string,
                      'uwot_version' = utils::packageVersion('uwot'),
                      'hnsw_version' = utils::packageVersion('RcppHNSW'),
                      'monocle_version' = utils::packageVersion('monocle3'),
                      'cds_version' = S4Vectors::metadata(cds)$cds_version,
                      'archive_version' = get_global_variable('transform_models_version'),
                      'directory' = directory_path,
                      'comment' = comment,
                      'files' = data.frame(cds_object = character(0),
                                           reduction_method = character(0),
                                           object_spec = character(0),
                                           file_format = character(0),
                                           file_path = character(0),
                                           file_md5sum = character(0),
                                           stringsAsFactors = FALSE))

  # Gather reduce_dimension reduction_methods and whether each has an annoy index.
  methods_reduce_dim <- list()
  for( reduction_method in names(cds@reduce_dim_aux)) {
    methods_reduce_dim[[reduction_method]] <- list()
    methods_reduce_dim[[reduction_method]][['rds_path']] <- paste0('rdd_', tolower(reduction_method), '_transform_model.rds')

    methods_reduce_dim[[reduction_method]][['annoy_index_path']] <- paste0('rdd_', tolower(reduction_method), '_transform_model_annoy.idx')
    methods_reduce_dim[[reduction_method]][['hnsw_index_path']] <- paste0('rdd_', tolower(reduction_method), '_transform_model_hnsw.idx')

    if(reduction_method == 'UMAP') {
      if(!is.null(cds@reduce_dim_aux[[reduction_method]][['model']][['umap_model']][['nn_index']])){
        methods_reduce_dim[[reduction_method]][['has_model_index']] <- TRUE
        methods_reduce_dim[[reduction_method]][['umap_index_path']] <- paste0('rdd_', tolower(reduction_method), '_transform_model_umap.idx')
      }
      else
        methods_reduce_dim[[reduction_method]][['has_model_index']] <- FALSE
    }

    if(!is.null(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']][['nn_index']]))
      methods_reduce_dim[[reduction_method]][['has_annoy_index']] <- TRUE
    else
      methods_reduce_dim[[reduction_method]][['has_annoy_index']] <- FALSE

    if(!is.null(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']][['nn_index']]))
      methods_reduce_dim[[reduction_method]][['has_hnsw_index']] <- TRUE
    else
      methods_reduce_dim[[reduction_method]][['has_hnsw_index']] <- FALSE

  }

  # Make directory if necessary.
  dir.create(path = directory_path, showWarnings=FALSE, recursive=TRUE, mode='0700')

  # Remove files, if they exist.
  for(reduction_method in names(methods_reduce_dim)) {
    if(file.exists(file.path(directory_path, methods_reduce_dim[[reduction_method]][['rds_path']])))
      file.remove(file.path(directory_path, methods_reduce_dim[[reduction_method]][['rds_path']]))

    if(file.exists(file.path(directory_path, methods_reduce_dim[[reduction_method]][['annoy_index_path']])))
      file.remove(file.path(directory_path, methods_reduce_dim[[reduction_method]][['annoy_index_path']]))

    if(file.exists(file.path(directory_path, methods_reduce_dim[[reduction_method]][['hnsw_index_path']])))
      file.remove(file.path(directory_path, methods_reduce_dim[[reduction_method]][['hnsw_index_path']]))

    if(reduction_method == 'UMAP') {
      if(methods_reduce_dim[[reduction_method]][['has_model_index']]) {
        if(file.exists(file.path(directory_path, methods_reduce_dim[[reduction_method]][['umap_index_path']])))
           file.remove(file.path(directory_path, methods_reduce_dim[[reduction_method]][['umap_index_path']]))
      }
    }
  }

  # Save reduce_dimension annoy indexes.
  # Notes:
  #   o  save RDS files before the corresponding index files in
  #      order to enable loading.
  #
  for(reduction_method in names(methods_reduce_dim)) {
    tryCatch(
      {
        saveRDS(cds@reduce_dim_aux[[reduction_method]], file=file.path(directory_path, methods_reduce_dim[[reduction_method]][['rds_path']]))
      },
      error = function(cond) {
                     message('problem writing file \'', file.path(directory_path, methods_reduce_dim[[reduction_method]][['rds_path']]), '\': ', cond, appendLF=appendLF)
                     return(NULL)
      },
      finally = {
        md5sum <- tools::md5sum(file.path(directory_path, methods_reduce_dim[[reduction_method]][['rds_path']]))
        file_index[['files']] <- rbind(file_index[['files']],
                                       data.frame(cds_object = 'reduce_dim_aux',
                                                  reduction_method = reduction_method,
                                                  object_spec = object_name_to_string(cds@reduce_dim_aux[[reduction_method]]),
                                                  file_format = 'rds',
                                                  file_path = methods_reduce_dim[[reduction_method]][['rds_path']],
                                                  file_md5sum = md5sum,
                                                  stringsAsFactors = FALSE))
    })
    if(methods_reduce_dim[[reduction_method]][['has_annoy_index']]) {
      tryCatch(
        {
          save_annoy_index(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']][['nn_index']], file.path(directory_path, methods_reduce_dim[[reduction_method]][['annoy_index_path']]))
        },
        error = function(cond) {
                       message('problem writing file \'', file.path(directory_path, methods_reduce_dim[[reduction_method]][['annoy_index_path']]), '\': ', cond, appendLF=appendLF)
                       return(NULL)
        },
        finally = {
          md5sum <- tools::md5sum(file.path(directory_path, methods_reduce_dim[[reduction_method]][['annoy_index_path']]))
          file_index[['files']] <- rbind(file_index[['files']],
                                         data.frame(cds_object = 'reduce_dim_aux',
                                                    reduction_method = reduction_method,
                                                    object_spec = object_name_to_string(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']][['nn_index']]),
                                                    file_format = 'annoy_index',
                                                    file_path = methods_reduce_dim[[reduction_method]][['annoy_index_path']],
                                                    file_md5sum = md5sum,
                                                    stringsAsFactors = FALSE))
        })
    }

    if(methods_reduce_dim[[reduction_method]][['has_hnsw_index']]) {
      tryCatch(
        {
          save_hnsw_index(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']][['nn_index']], file.path(directory_path, methods_reduce_dim[[reduction_method]][['hnsw_index_path']]))
        },
        error = function(cond) {
                       message('problem writing file \'', file.path(directory_path, methods_reduce_dim[[reduction_method]][['hnsw_index_path']]), '\': ', cond, appendLF=appendLF)
                       return(NULL)
        },
        finally = {
          md5sum <- tools::md5sum(file.path(directory_path, methods_reduce_dim[[reduction_method]][['hnsw_index_path']]))
          file_index[['files']] <- rbind(file_index[['files']],
                                         data.frame(cds_object = 'reduce_dim_aux',
                                                    reduction_method = reduction_method,
                                                    object_spec = object_name_to_string(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']][['nn_index']]),
                                                    file_format = 'hnsw_index',
                                                    file_path = methods_reduce_dim[[reduction_method]][['hnsw_index_path']],
                                                    file_md5sum = md5sum,
                                                    stringsAsFactors = FALSE))
        })
    }
    if(reduction_method == 'UMAP' && methods_reduce_dim[[reduction_method]][['has_model_index']]) {
      tryCatch(
        {
          md5sum <- save_umap_nn_indexes(cds@reduce_dim_aux[[reduction_method]][['model']][['umap_model']], file.path(directory_path, methods_reduce_dim[[reduction_method]][['umap_index_path']]))
        },
        error = function(cond) {
                       message('problem writing file \'', file.path(directory_path, methods_reduce_dim[[reduction_method]][['umap_index_path']]), '\': ', cond, appendLF=appendLF)
                       return(NULL)
        },
        finally = {
          file_index[['files']] <- rbind(file_index[['files']],
                                         data.frame(cds_object = 'reduce_dim_aux',
                                                    reduction_method = reduction_method,
                                                    object_spec = object_name_to_string(cds@reduce_dim_aux[[reduction_method]][['model']][['umap_model']]),
                                                    file_format = 'umap_annoy_index',
                                                    file_path = methods_reduce_dim[[reduction_method]][['umap_index_path']],
                                                    file_md5sum = md5sum,
                                                    stringsAsFactors = FALSE))
        })
    }
  }

  # Save file_index.rds.
  saveRDS(file_index, file=file.path(directory_path, 'file_index.rds'))

  if(verbose) {
    report_files_saved(file_index)
  }
}


#' Load transform models into a cell_data_set.
#'
#' Load transform models, which were saved using save_transform_models,
#' into a cell_data_set. This function over-writes existing models in
#' the cell_data_set. For more information read the help information
#' for save_transform_models.
#'
#' @param cds a cell_data_set to be transformed using the models.
#' @param directory_path a string giving the name of the directory
#'   from which to read the model files.
#'
#' @return a cell_data_set with the transform models loaded by
#'   load_transform_models.
#'
#' @examples
#'   \dontrun{
#'     cds <- load_a549()
#'     cds <- preprocess_cds(cds)
#'     cds <- reduce_dimension(cds)
#'     save_transform_models(cds, 'tm')
#'     cds1 <- load_a549()
#'     cds1 <- load_transform_models(cds1, 'tm')
#'   }
#' @export
# Bioconductor forbids writing to user directories so examples
# is not run.
load_transform_models <- function(cds, directory_path) {
  appendLF <- TRUE
  # Check for directory.
  if(!file.exists(directory_path))
    stop('Directory \'', directory_path, '\' does not exist.')

  # Check for file_index.rds.
  file_index_path <- file.path(directory_path, 'file_index.rds')
  if(!file.exists(file_index_path))
    stop('Missing file index file \'', file_index_path, '\'')

  # Read file index.
  file_index <- tryCatch(
    {
      readRDS(file_index_path)
    },
    error = function(cond) {
              message('problem reading file \'', file_index_path, '\': ', cond, appendLF=appendLF);
              return(NULL)
    }
  )

  # Check that this is a save_transform_models archive.
  if(file_index[['save_function']] != 'save_transform_models') {
    stop('The files in ', directory_path, ' are not from save_transform_models.')
  }

  # Write stored comment field.
  if(length(file_index[['comment']]) > 1) {
    message('File comment: ', file_index[['comment']], appendLF=appendLF)
  }

  # Loop through the files in file_index.rds in order
  # to restore objects.
  for(ifile in seq_along(file_index[['files']][['cds_object']])) {
    file_path <- file.path(directory_path, file_index[['files']][['file_path']][[ifile]])
    file_format <- file_index[['files']][['file_format']][[ifile]]
    cds_object <- file_index[['files']][['cds_object']][[ifile]]
    reduction_method <- file_index[['files']][['reduction_method']][[ifile]]
    md5sum <- file_index[['files']][['file_md5sum']][[ifile]]

    if(!file.exists(file_path))
      stop('load_transform_models: missing file \'', file_path, '\'')

    #
    # For UWOT UMAP annoy index, the function load_umap_nn_indexes
    # checks md5sums internally so don't check here.
    #
    md5sum_file <- tools::md5sum(file_path)
    if(!(cds_object == 'reduce_dim_aux' &&
         reduction_method == 'UMAP' &&
         file_format == 'umap_nn_index' &&
         nchar(md5sum) > 32)) {
      if(is.na(md5sum_file) || (md5sum_file != md5sum)) {
        stop('md5sum mismatch for file \'', file_path, '\'')
      }
    }

    #
    # Note:
    #   o  expect that the RDS file for a reduction_method
    #      appears before index files for the reduction_method
    #
    if(cds_object == 'reduce_dim_aux') {
      if(file_format == 'rds') {
        cds@reduce_dim_aux[[reduction_method]] <- tryCatch(
          {
            readRDS(file_path)
          },
          error = function(cond) {
            message('problem reading file \'', file_path, '\'', appendLF=appendLF)
            return(NULL)
          })
      } else
      if(file_format == 'annoy_index') {
        cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']] <- update_annoy_index(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']])

        metric <- cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']][['nn_index']][['metric']]
        ncolumn <- cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']][['nn_index']][['ncol']]
        cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']][['nn_index']] <- tryCatch(
          {
            load_annoy_index(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']][['nn_index']], file_path, metric, ncolumn)
          },
          error = function(cond) {
            message('problem reading file \'', file_path, '\'', appendLF=appendLF)
            return(NULL)
          })
      } else
      if(file_format == 'hnsw_index') {
        cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']] <- update_hnsw_index(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']])

        metric <- cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']][['nn_index']][['metric']]
        ncolumn <- cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']][['nn_index']][['ncol']]
        cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']][['nn_index']] <- tryCatch(
          {
            load_hnsw_index(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']][['nn_index']], file_path, metric, ncolumn)
          },
          error = function(cond) {
            message('problem reading file \'', file_path, '\'', appendLF=appendLF)
            return(NULL)
          })
      } else
      if(reduction_method == 'UMAP' && file_format == 'umap_annoy_index') {
        cds@reduce_dim_aux[[reduction_method]][['model']][['umap_model']] <- tryCatch(
          {
            load_umap_nn_indexes(cds@reduce_dim_aux[[reduction_method]][['model']][['umap_model']], file_path, md5sum)
          },
          error = function(cond) {
            message('problem reading file \'', file_path, '\'', appendLF=appendLF)
            return(NULL)
         })
      } else {
        stop('Unrecognized file format value \'', file_format, '\'')
      }
      cds <- set_model_identity_path(cds, reduction_method, directory_path)
    } else {
      stop('Unrecognized cds_object value \'', cds_object, '\'')
    }
  }

  return(cds)
}


#
# Check cds assays for HDF5Array objects.
#
#' @importFrom S4Vectors getListElement
test_hdf5_assays <- function(cds) {
  assays <- assays(cds)
  for( idx in seq_along(assays)) {
    asyl <- getListElement(assays, idx)
    hdf5_test <- unlist(DelayedArray::seedApply(asyl, methods::is, "HDF5ArraySeed"))
    if(any(unlist(hdf5_test))) return(TRUE)
  }
  FALSE
}


#
#' Save a Monocle3 full cell_data_set.
#'
#' Save a Monocle3 full cell_data_set to a specified directory
#' by writing the R objects to RDS files and the nearest
#' neighbor indexes to index files. The assays
#' objects are saved as HDF5Array files when hdf5_assays=TRUE
#' or when the cell_data_set assays are HDF5Array objects. If
#' any assay in the cell_data set is an HDF5 object, all assays
#' must be. When save_monocle_objects is run with hdf5_assays=TRUE,
#' the load_monocle_objects function loads the saved assays into
#' HDF5Array objects in the resulting cell_data_set. Note:
#' operations such as preprocess_cds that are run on assays stored
#' as HDF5Arrays are much, much slower than the same operations
#' run on assays stored as in-memory matrices. You may want to
#' investigate parameters related to the Bioconductor DelayedArray
#' and BiocParallel packages in this case.
#'
#' @param cds a cell_data_set to save.
#' @param directory_path a string giving the name of the directory
#'   in which to write the object files.
#' @param hdf5_assays a boolean determining whether the
#'   non-HDF5Array assay objects are saved as HDF5 files. At this
#'   time cell_data_set HDF5Array assay objects are stored as
#'   HDF5Assay files regardless of the hdf5_assays parameter value.
#' @param comment a string with optional notes that is saved with
#'   the objects.
#' @param verbose a boolean determining whether to print information
#'   about the saved files.
#' @return none.
#'
#' @examples
#'   \dontrun{
#'     cds <- load_a549()
#'     save_monocle_objects(cds, 'mo')
#'   }
#'
#' @export
# Bioconductor forbids writing to user directories so examples
# is not run.
save_monocle_objects <- function(cds, directory_path, hdf5_assays=FALSE, comment="", verbose=TRUE) {
  appendLF <- TRUE
  # file information is written to an RDS file
  # in directory_path
  #   cds_object: cds | reduce_dim_aux
  #   reduction_method: PCA | LSI | Aligned | tSNE | UMAP  ...
  #   nn_method: annoy | hnsw
  #   object_spec: ex: cds@reduce_dim_aux[[reduction_method]]
  #   file_format: rds | annoy_index | umap_annoy_index | hnsw_index
  #   file_path: path within directory_path (need file name)
  #   file_md5sum: md5sum of file(s)
  file_index <- list( 'save_function' = 'save_monocle_objects',
                      'archive_date' = Sys.time(),
                      'r_version' = R.Version()$version.string,
                      'uwot_version' = utils::packageVersion('uwot'),
                      'hnsw_version' = utils::packageVersion('RcppHNSW'),
                      'hdf5array_version' = utils::packageVersion('HDF5Array'),
                      'monocle_version' = utils::packageVersion('monocle3'),
                      'cds_version' = S4Vectors::metadata(cds)$cds_version,
                      'archive_version' = get_global_variable('monocle_objects_version'),
                      'directory' = directory_path,
                      'comment' = comment,
                      'files' = data.frame(cds_object = character(0),
                                           reduction_method = character(0),
                                           object_spec = character(0),
                                           file_format = character(0),
                                           file_path = character(0),
                                           file_md5sum = character(0),
                                           stringsAsFactors = FALSE))

  # Save assays as HDF5Array objects?
  hdf5_assay_flag <- hdf5_assays || test_hdf5_assays(cds)

  # Path of cds object file.
  rds_path <- 'cds_object.rds'
  hdf5_path <- 'hdf5_object'

  # Gather reduce_dimension reduction_method names for which indexes exist.
  methods_reduce_dim <- list()
  for(reduction_method in names(cds@reduce_dim_aux)) {
    methods_reduce_dim[[reduction_method]] <- list()

    methods_reduce_dim[[reduction_method]][['annoy_index_path']] <- paste0('rdd_', tolower(reduction_method), '_transform_model_annoy.idx')
    methods_reduce_dim[[reduction_method]][['hnsw_index_path']] <- paste0('rdd_', tolower(reduction_method), '_transform_model_hnsw.idx')

    if(!is.null(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']][['nn_index']]))
      methods_reduce_dim[[reduction_method]][['has_annoy_index']] <- TRUE
    else
      methods_reduce_dim[[reduction_method]][['has_annoy_index']] <- FALSE

    if(!is.null(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']][['nn_index']]))
      methods_reduce_dim[[reduction_method]][['has_hnsw_index']] <- TRUE
    else
      methods_reduce_dim[[reduction_method]][['has_hnsw_index']] <- FALSE

    if(reduction_method == 'UMAP') {
      if(!is.null(cds@reduce_dim_aux[[reduction_method]][['model']][['umap_model']][['nn_index']])){
        methods_reduce_dim[[reduction_method]][['has_model_index']] <- TRUE
        methods_reduce_dim[[reduction_method]][['umap_index_path']] <- paste0('rdd_', tolower(reduction_method), '_transform_model_umap.idx')
      }
      else
        methods_reduce_dim[[reduction_method]][['has_model_index']] <- FALSE
    }
  }

  # Make directory if necessary.
  dir.create(path = directory_path, showWarnings=FALSE, recursive=TRUE, mode='0700')

  # Remove files, if they exist.
  for(reduction_method in names(methods_reduce_dim)) {
    if(file.exists(file.path(directory_path, rds_path)))
      file.remove(file.path(directory_path, rds_path))

    if(file.exists(file.path(directory_path, methods_reduce_dim[[reduction_method]][['annoy_index_path']])))
       file.remove(file.path(directory_path, methods_reduce_dim[[reduction_method]][['annoy_index_path']]))

    if(file.exists(file.path(directory_path, methods_reduce_dim[[reduction_method]][['hnsw_index_path']])))
       file.remove(file.path(directory_path, methods_reduce_dim[[reduction_method]][['hnsw_index_path']]))

    if(reduction_method == 'UMAP') {
      if(methods_reduce_dim[[reduction_method]][['has_model_index']]) {
        if(file.exists(file.path(directory_path, methods_reduce_dim[[reduction_method]][['umap_index_path']])))
           file.remove(file.path(directory_path, methods_reduce_dim[[reduction_method]][['umap_index_path']]))
      }
    }
  }

  #
  # Save cds object.
  # Notes:
  #   o  allow for HDF5Array assay objects.
  #
  if(!hdf5_assay_flag) {
    tryCatch(
      {
        saveRDS(cds, file.path(directory_path, rds_path))
      },
      error = function(cond) {
                       message('problem writing file \'', file.path(directory_path, rds_path), '\': ', cond, appendLF=appendLF)
                       return(NULL)
      },
      finally = {
        md5sum <- tools::md5sum(file.path(directory_path, rds_path))
        file_index[['files']] <- rbind(file_index[['files']],
                                       data.frame(cds_object = 'cds',
                                                  reduction_method = NA,
                                                  object_spec = object_name_to_string(cds),
                                                  file_format = 'rds',
                                                  file_path = rds_path,
                                                  file_md5sum = md5sum,
                                                  stringsAsFactors = FALSE))
      })
  } else {
    tryCatch(
      {
        HDF5Array::saveHDF5SummarizedExperiment(cds, file.path(directory_path, hdf5_path), replace=TRUE)
      },
      error = function(cond) {
                       message('problem writing file \'', file.path(directory_path, hdf5_path), '\': ', cond, appendLF=appendLF)
                       return(NULL)
      },
      finally = {
        md5sum <- tools::md5sum(file.path(directory_path, hdf5_path, 'se.rds'))
        file_index[['files']] <- rbind(file_index[['files']],
                                       data.frame(cds_object = 'cds',
                                                  reduction_method = NA,
                                                  object_spec = object_name_to_string(cds),
                                                  file_format = 'hdf5',
                                                  file_path = hdf5_path,
                                                  file_md5sum = md5sum,
                                                  stringsAsFactors = FALSE))
      })
  }

  # Save reduce_dimension annoy indexes.
  # Notes:
  #   o  save RDS files before the corresponding index files in
  #      order to enable loading.
  #
  for(reduction_method in names(methods_reduce_dim)) {
    if(methods_reduce_dim[[reduction_method]][['has_annoy_index']]) {
      tryCatch(
        {
          save_annoy_index(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']][['nn_index']], file.path(directory_path, methods_reduce_dim[[reduction_method]][['annoy_index_path']]))
        },
        error = function(cond) {
                       message('problem writing file \'', file.path(directory_path, methods_reduce_dim[[reduction_method]][['annoy_index_path']]), '\': ', cond, appendLF=appendLF)
                       return(NULL)
        },
        finally = {
          md5sum <- tools::md5sum(file.path(directory_path, methods_reduce_dim[[reduction_method]][['annoy_index_path']]))
          file_index[['files']] <- rbind(file_index[['files']],
                                         data.frame(cds_object = 'reduce_dim_aux',
                                                    reduction_method = reduction_method,
                                                    object_spec = object_name_to_string(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']][['nn_index']]),
                                                    file_format = 'annoy_index',
                                                    file_path = methods_reduce_dim[[reduction_method]][['annoy_index_path']],
                                                    file_md5sum = md5sum,
                                                    stringsAsFactors = FALSE))
        })
    }
    if(methods_reduce_dim[[reduction_method]][['has_hnsw_index']]) {
      tryCatch(
        {
          save_hnsw_index(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']][['nn_index']], file.path(directory_path, methods_reduce_dim[[reduction_method]][['hnsw_index_path']]))
        },
        error = function(cond) {
                       message('problem writing file \'', file.path(directory_path, methods_reduce_dim[[reduction_method]][['hswn_index_path']]), '\': ', cond, appendLF=appendLF)
                       return(NULL)
        },
        finally = {
          md5sum <- tools::md5sum(file.path(directory_path, methods_reduce_dim[[reduction_method]][['hnsw_index_path']]))
          file_index[['files']] <- rbind(file_index[['files']],
                                         data.frame(cds_object = 'reduce_dim_aux',
                                                    reduction_method = reduction_method,
                                                    object_spec = object_name_to_string(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']][['nn_index']]),
                                                    file_format = 'hnsw_index',
                                                    file_path = methods_reduce_dim[[reduction_method]][['hnsw_index_path']],
                                                    file_md5sum = md5sum,
                                                    stringsAsFactors = FALSE))
        })
    }
    if(reduction_method == 'UMAP' && methods_reduce_dim[[reduction_method]][['has_model_index']]) {
      tryCatch(
        {
          md5sum <- save_umap_nn_indexes(cds@reduce_dim_aux[[reduction_method]][['model']][['umap_model']], file.path(directory_path, methods_reduce_dim[[reduction_method]][['umap_index_path']]))
        },
        error = function(cond) {
                       message('problem writing file \'', file.path(directory_path, methods_reduce_dim[[reduction_method]][['umap_index_path']]), '\': ', cond, appendLF=appendLF)
                       return(NULL)
        },
        finally = {
          file_index[['files']] <- rbind(file_index[['files']],
                                         data.frame(cds_object = 'reduce_dim_aux',
                                                    reduction_method = reduction_method,
                                                    object_spec = object_name_to_string(cds@reduce_dim_aux[[reduction_method]][['model']][['umap_model']]),
                                                    file_format = 'umap_annoy_index',
                                                    file_path = methods_reduce_dim[[reduction_method]][['umap_index_path']],
                                                    file_md5sum = md5sum,
                                                    stringsAsFactors = FALSE))
        })
    }
  }

  # Save file_index.rds.
  saveRDS(file_index, file=file.path(directory_path, 'file_index.rds'))

  if(verbose) {
    report_files_saved(file_index)
  }
}


#
#' Load a full Monocle3 cell_data_set.
#'
#' Load a full Monocle3 cell_data_set, which was saved using
#' save_monocle_objects. For more information read the help
#' information for save_monocle_objects.
#'
#' @param directory_path a string giving the name of the directory
#'   from which to read the saved cell_data_set files.
#' @return a cell_data_set.
#'
#' @examples
#'   \dontrun{
#'     cds <- load_a549()
#'     save_monocle_objects(cds, 'mo')
#'     cds1 <- load_monocle_objects('mo')
#'   }
#'
#' @export
# Bioconductor forbids writing to user directories so examples
# is not run.
load_monocle_objects <- function(directory_path) {
  appendLF <- FALSE
  # Check for directory.
  if(!file.exists(directory_path))
    stop('Directory or file \'', directory_path, '\' does not exist.')

  # Check for file_index.rds. If none, assume that 'directory_path' is
  # an RDS file.
  file_index_path <- file.path(directory_path, 'file_index.rds')
  if(!file.exists(file_index_path)) {
    cds <- load_monocle_rds(directory_path)
    return(cds)
  }

  # Read file index.
  catch_error <- FALSE
  file_index <- tryCatch(
    {
      readRDS(file_index_path)
    },
    error = function(cond) {
              message('problem reading file \'', file_index_path, '\': ', cond, appendLF=appendLF);
              catch_error <<- TRUE
    },
    warning = function(cond) {
              message('problem reading file \'', file_index_path, '\': ', cond, appendLF=appendLF);
              catch_error <<- TRUE
    }
  )

  if(catch_error) {
    return(NULL)
  }

  # Check that this is a save_monocle_objects archive.
  if(file_index[['save_function']] != 'save_monocle_objects') {
    stop('The files in ', directory_path, ' are not from save_monocle_objects.')
  }

  # Write stored comment field.
  if(length(file_index[['comment']]) > 1) {
    message('File comment: ', file_index[['comment']], appendLF=appendLF)
  }

  # Loop through the files in file_index.rds in order
  # to restore objects.
  for(ifile in seq_along(file_index[['files']][['cds_object']])) {
    file_path <- file.path(directory_path, file_index[['files']][['file_path']][[ifile]])
    file_format <- file_index[['files']][['file_format']][[ifile]]
    cds_object <- file_index[['files']][['cds_object']][[ifile]]
    reduction_method <- file_index[['files']][['reduction_method']][[ifile]]
    md5sum <- file_index[['files']][['file_md5sum']][[ifile]]

    if(!file.exists(file_path)) 
      stop('load_monocle_objects: missing file \'', file_path, '\'')

    #
    # The functions load_umap_nn_indexes and
    # loadHDF5SummarizedExperiment check md5sums
    # internally so don't check here.
    #
    if(!(cds_object == 'reduce_dim_aux' &&
         reduction_method == 'UMAP' &&
         file_format == 'umap_nn_index' &&
         nchar(md5sum) > 32) &&
       file_format != 'hdf5') {
      md5sum_file <- tools::md5sum(file_path)
      if(is.na(md5sum_file) || (md5sum_file != md5sum)) {
        stop('md5sum mismatch for file \'', file_path, '\'')
      }
    }

    #
    # Note:
    #   o  expect that the RDS file for a reduction_method
    #      appears before index files for the reduction_method
    #
    if(cds_object == 'cds') {
      if(file_format == 'rds') {
        cds <- tryCatch(
          {
            readRDS(file_path)
          },
          error = function(cond) {
            message('problem reading file \'', file_path, '\'', appendLF=appendLF)
            return(NULL)
          })
      } else
      if(file_format == 'hdf5') {
        cds <- tryCatch(
          {
            HDF5Array::loadHDF5SummarizedExperiment(file_path)
          },
          error = function(cond) {
            message('problem reading file \'', file_path, '\'', appendLF=appendLF)
            return(NULL)
          })
      } else {
        stop('Unrecognized cds format value \'', file_format, '\'')
      }
    } else
    if(cds_object == 'reduce_dim_aux') {
      if(file_format == 'annoy_index') {
        cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']] <- update_annoy_index(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']])

        metric <- cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']][['nn_index']][['metric']]
        ncolumn <- cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']][['nn_index']][['ncol']]
        cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']][['nn_index']] <- tryCatch(
          {
            load_annoy_index(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['annoy']][['nn_index']], file_path, metric, ncolumn)
          },
          error = function(cond) {
            message('problem reading file \'', file_path, '\'', appendLF=appendLF)
            return(NULL)
          })
      } else
      if(file_format == 'hnsw_index') {
        cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']] <- update_hnsw_index(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']])

        metric <- cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']][['nn_index']][['metric']]
        ncolumn <- cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']][['nn_index']][['ncol']]
        cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']][['nn_index']] <- tryCatch(
          {
            load_hnsw_index(cds@reduce_dim_aux[[reduction_method]][['nn_index']][['hnsw']][['nn_index']], file_path, metric, ncolumn)
          },
          error = function(cond) {
            message('problem reading file \'', file_path, '\'', appendLF=appendLF)
            return(NULL)
          })
      } else
      if(reduction_method == 'UMAP' && file_format == 'umap_annoy_index') {
        cds@reduce_dim_aux[[reduction_method]][['model']][['umap_model']] <- tryCatch(
          {
            load_umap_nn_indexes(cds@reduce_dim_aux[[reduction_method]][['model']][['umap_model']], file_path, md5sum)
          },
          error = function(cond) {
            message('problem reading file \'', file_path, '\'', appendLF=appendLF)
            return(NULL)
         })
      } else {
        stop('Unrecognized file format value \'', file_format, '\'')
      }
      cds <- set_model_identity_path(cds, reduction_method, directory_path)
    } else {
      stop('Unrecognized cds_object value \'', cds_object, '\'')
    }
  }

  return(cds)
}


#
#  Operations from Monocle3 documentation
#   o  expression matrix
#   o  PCA: svd_v svd_sdev and gene_loadings and prop_var_expl
#   o  LSI: ?
#   o  Aligned: beta array and ?
#   o  cell clustering
#   o  marker genes
#   o  cell annotations
#   o  find_gene_modules
#   o  Garnett annotations?
#   o  learn_graph()
#   o  order_cells()
#   o  fit_models() DE regression analysis
#   o  graph_test() DE graph-autocorrelation analysis
#   o  Cicero annotations?
#
# Objects on loading (initial)
#   o  @ reduce_dim_aux
#   o  @ principal_graph_aux
#   o  @ principal_graph
#   o  @ clusters
#   o  @ int_elementMetadata
#   o  @ int_colData
#   o  @ int_metadata
#   o  @ rowRanges
#   o  @ colData
#   o  @ assays
#   o  @ NAMES
#   o  @ elementMetadata
#   o  @ metadata
#   o  $ preprocess_aux (vestigial now)
#
# Objects after analysis consisting of
#   cds <- preprocess_cds(cds, num_dim = 100)
#   cds <- align_cds(cds, num_dim = 100, alignment_group = "plate")
#   cds <- reduce_dimension(cds)
#   cds <- cluster_cells(cds, resolution=1e-5)
#   marker_test_res <- top_markers(cds, group_cells_by="partition", reference_cells=1000, cores=8)
#   colData(cds)$assigned_cell_type <- as.character(partitions(cds))
#   cds_subset <- choose_cells(cds)
#   pr_graph_test_res <- graph_test(cds_subset, neighbor_graph="knn", cores=8)
#   pr_deg_ids <- row.names(subset(pr_graph_test_res, morans_I > 0.01 & q_value < 0.05))
#   gene_module_df <- find_gene_modules(cds_subset[pr_deg_ids,], resolution=1e-3)
#   cds_subset <- cluster_cells(cds_subset, resolution=1e-2)
#
#   o  @ reduce_dim_aux
#   o  @ principal_graph_aux
#   o  @ principal_graph
#   o  @ clusters
#   o  @ int_elementMetadata
#   o  @ int_colData
#   o  @ int_metadata
#   o  @ rowRanges
#   o  @ colData
#   o  @ assays
#   o  @ NAMES
#   o  @ elementMetadata
#   o  @ metadata
#   o  $ preprocess_aux (empty)
#
# Objects after analysis of
#   cds <- load_monocle_rds('packer_embryo.load.rds')
#   cds <- preprocess_cds(cds, num_dim = 50)
#   cds <- align_cds(cds, alignment_group = "batch", residual_model_formula_str = "~ bg.300.loading + bg.400.loading + bg.500.1.loading + bg.500.2.loading + bg.r17.loading + bg.b01.loading + bg.b02.loading")
#   cds <- reduce_dimension(cds)
#   cds <- cluster_cells(cds)
#   cds <- learn_graph(cds)
#   cds <- order_cells(cds)
#   ciliated_genes <- c("che-1", "hlh-17", "nhr-6", "dmd-6", "ceh-36", "ham-1")
#   cds_subset <- cds[rowData(cds)$gene_short_name %in% ciliated_genes,]
#   gene_fits <- fit_models(cds_subset, model_formula_str = "~embryo.time")
#   subset_pr_test_res <- graph_test(cds_subset, neighbor_graph="principal_graph", cores=4)
#   pr_deg_ids <- row.names(subset(subset_pr_test_res, q_value < 0.05))
#
#   o  @ reduce_dim_aux
#   o  @ principal_graph_aux
#   o  @ principal_graph
#   o  @ clusters
#   o  @ int_elementMetadata
#   o  @ int_colData
#   o  @ int_metadata
#   o  @ rowRanges
#   o  @ colData
#   o  @ assays
#   o  @ NAMES
#   o  @ elementMetadata
#   o  @ metadata
#
# Strategy
#   o  copy slots
#        o  @ assays
#        o  @ colData
#        o  @ rowRanges
#        o  @ int_metadata ?
#        o  @ int_colData
#        o  @ int_elementMetadata
#        o  @ clusters
#        o  @ principal_graph
#        o  @ principal_graph_aux
#        o  @ reduce_dim_aux
#   o  transfer
#        o  @ preprocess_aux -> reduce_dim_aux (done)
#
# Issues
#   o  deal with missing values/objects; e.g., projection related and nearest neighbors
#        o  no annoy indices
#        o  no models initially; partial models after loading
#        o  no model identities
#        o  affected functions
#             o  projection functions
#             o  save/load models
#             o  save/load monocle objects?
#   o  gene_loadings to svd_v and svd_sdev conversion? (done)
#   o  the user may save a cds made using the recent Monocle3 version
#      and load it using load_monocle_objects. In that case, load the
#      cds using readRDS and return it unmodified. Use a model_version
#      check?
#   o  can there be an inconsistency between the model and matrix identity
#      version information?

load_monocle_rds <- function(file_path) {
  appendLF <- TRUE
  catch_error <- FALSE
  cds_tmp <- tryCatch(
                       {
                         readRDS(file_path)
                       },
                       error=function(cond) {
                         message('problem reading file \'', file_path, '\': ', cond, appendLF=appendLF);
                         catch_error <<- TRUE
                       },
                       warning=function(cond) {
                         message('problem reading file \'', file_path, '\': ', cond, appendLF=appendLF);
                         catch_error <<- TRUE
                       }
                     )

  if(catch_error) {
    return(NULL)
  }

  cds <- cds_tmp
  if(!is.null(SingleCellExperiment::reducedDims(cds_tmp)[['PCA']])) {
    if(is.null(cds_tmp@reduce_dim_aux[['PCA']][['model']][['identity']])) {
      cds <- initialize_reduce_dim_model_identity(cds, 'PCA')
    }
    if(!is.null(attr(cds, which='preprocess_aux')) && !is.null(cds_tmp@preprocess_aux[['gene_loadings']])) {
      gene_loadings <- cds_tmp@preprocess_aux[['gene_loadings']]
      svd_sdev <- apply(gene_loadings, 2, function(x) {sqrt(sum(x^2))})
      svd_v <- gene_loadings %*% diag(1.0/svd_sdev)
      colnames(svd_v) <- paste("PC", seq(1, ncol(svd_v)), sep="")
      cds@reduce_dim_aux[['PCA']][['model']][['svd_sdev']] <- svd_sdev
      cds@reduce_dim_aux[['PCA']][['model']][['svd_v']] <- svd_v
      cds@reduce_dim_aux[['PCA']][['model']][['prop_var_expl']] <- cds_tmp@preprocess_aux[['prop_var_expl']]
    }
  }

  if(!is.null(SingleCellExperiment::reducedDims(cds_tmp)[['LSI']])) {
    if(is.null(cds_tmp@reduce_dim_aux[['LSI']][['model']][['identity']])) {
      cds <- initialize_reduce_dim_model_identity(cds, 'LSI')
    }
    if(!is.null(attr(cds, which='preprocess_aux')) && !is.null(cds_tmp@preprocess_aux[['gene_loadings']])) {
      gene_loadings <- cds_tmp@preprocess_aux[['gene_loadings']]
      svd_sdev <- apply(gene_loadings, 2, function(x) {sqrt(sum(x^2))})
      cds@reduce_dim_aux[['LSI']][['model']][['svd_v']] <- gene_loadings %*% diag(1.0/svd_sdev)
      cds@reduce_dim_aux[['LSI']][['model']][['svd_sdev']] <- svd_sdev
    }
  }

  if(!is.null(SingleCellExperiment::reducedDims(cds_tmp)[['Aligned']])) {
    if(is.null(cds_tmp@reduce_dim_aux[['Aligned']][['model']][['identity']])) {
      cds <- initialize_reduce_dim_model_identity(cds, 'Aligned')
    }
    if(!is.null(attr(cds, which='preprocess_aux')) && !is.null(cds_tmp@preprocess_aux$beta)) {
      cds@reduce_dim_aux[['Aligned']][['model']][['beta']] <- cds_tmp@preprocess_aux$beta
    }
  }

  if(!is.null(SingleCellExperiment::reducedDims(cds_tmp)[['tSNE']])) {
    if(is.null(cds_tmp@reduce_dim_aux[['tSNE']][['model']][['identity']])) {
      cds <- initialize_reduce_dim_model_identity(cds, 'tSNE')
    }
  }

  if(!is.null(SingleCellExperiment::reducedDims(cds_tmp)[['UMAP']])) {
    if(is.null(cds_tmp@reduce_dim_aux[['UMAP']][['model']][['identity']])) {
      cds <- initialize_reduce_dim_model_identity(cds, 'UMAP')
    }
  }

  if(is.null(SingleCellExperiment::int_metadata(cds_tmp)[['counts_metadata']])) {
    cds <- initialize_counts_metadata(cds)
  }

  # The RDS file may have a preprocess_aux slot, which doesn't
  # exist in the current cell_data_set class. The attr command
  # appears to remove it whereas setting cds@preprocess_cds
  # and cds$preprocess_cds to NULL do not.
  if(!is.null(attr(cds, which='preprocess_aux'))) {
    attr(cds, which='preprocess_aux') <- NULL
  }

  rm(cds_tmp)

  return(cds)
}
cole-trapnell-lab/monocle3 documentation built on April 7, 2024, 9:24 p.m.