R/dimension_reduction.R

Defines functions runGiottoHarmony runtSNE runUMAP signPCA jackstrawPlot create_jackstrawplot screePlot create_screeplot runPCA create_feats_to_use_matrix runPCA_BiocSingular runPCA_factominer runPCA_prcomp_irlba pca_giotto

Documented in create_feats_to_use_matrix create_jackstrawplot create_screeplot jackstrawPlot pca_giotto runGiottoHarmony runPCA runPCA_BiocSingular runPCA_factominer runPCA_prcomp_irlba runtSNE runUMAP screePlot signPCA

# * Dimension Reduction Object Creation #
# ! Moved to classes.R



## * PCA  ####
# ---------- #

#' @title pca_giotto
#' @name pca_giotto
#' @description performs PCA based on Rfast
#' @param mymatrix matrix or object that can be converted to matrix
#' @param center center data
#' @param scale scale features
#' @param k number of principal components to calculate
#' @keywords internal
#' @return list of eigenvalues, eigenvectors and pca coordinates
pca_giotto = function(mymatrix, center = T, scale = T, k = 50) {

  if(!is.null(k) & k > ncol(mymatrix)) {
    warning('k > ncol(matrix), will be set to ncol(matrix)')
    k = ncol(mymatrix)
  }

  if(!is.matrix(mymatrix)) mymatrix = as.matrix(mymatrix)
  my_t_matrix = t_flex(mymatrix)
  pca_f = Rfast::hd.eigen(x = my_t_matrix, center = center, scale = scale, k = k, vectors = TRUE)

  # calculate pca coordinates
  rotated_mat = standardise_flex(x = my_t_matrix, center = center, scale = scale)
  coords = rotated_mat %*% pca_f$vectors
  colnames(coords) = paste0('Dim.', 1:ncol(coords))

  return(list(eigenvalues = pca_f$values, eigenvectors = pca_f$vectors, coords = coords))

}


#' @title runPCA_prcomp_irlba
#' @name runPCA_prcomp_irlba
#' @description performs PCA based on the irlba package
#' @param x matrix or object that can be converted to matrix
#' @param ncp number of principal components to calculate
#' @param center center data
#' @param scale scale features
#' @param rev reverse PCA
#' @param set_seed use of seed
#' @param seed_number seed number to use
#' @keywords internal
#' @return list of eigenvalues, loadings and pca coordinates
runPCA_prcomp_irlba = function(x,
                               ncp = 100,
                               center = TRUE,
                               scale = TRUE,
                               rev = FALSE,
                               set_seed = TRUE,
                               seed_number = 1234,
                               ...) {

  min_ncp = min(dim(x))

  if(ncp >= min_ncp) {
    warning("ncp >= minimum dimension of x, will be set to minimum dimension of x - 1")
    ncp = min_ncp-1
  }

  if(isTRUE(rev)) {

    x = t_flex(x)

    # start seed
    if(isTRUE(set_seed)) {
      set.seed(seed = seed_number)
    }

    pca_res = irlba::prcomp_irlba(x = x, n = ncp, center = center, scale. = scale, ...)

    # exit seed
    if(isTRUE(set_seed)) {
      set.seed(seed = Sys.time())
    }

    # eigenvalues
    eigenvalues = pca_res$sdev^2
    # PC loading
    loadings = pca_res$x
    rownames(loadings) = rownames(x)
    colnames(loadings) = paste0('Dim.', 1:ncol(loadings))
    # coordinates
    coords = pca_res$rotation
    rownames(coords) = colnames(x)
    colnames(coords) = paste0('Dim.', 1:ncol(coords))
    result = list(eigenvalues = eigenvalues, loadings = loadings, coords = coords)

  } else {

    # start seed
    if(isTRUE(set_seed)) {
      set.seed(seed = seed_number)
    }
    pca_res = irlba::prcomp_irlba(x = x, n = ncp, center = center, scale. = scale, ...)

    # exit seed
    if(isTRUE(set_seed)) {
      set.seed(seed = Sys.time())
    }

    # eigenvalues
    eigenvalues = pca_res$sdev^2
    # PC loading
    loadings = pca_res$rotation
    rownames(loadings) = colnames(x)
    colnames(loadings) = paste0('Dim.', 1:ncol(loadings))
    # coordinates
    coords = pca_res$x
    rownames(coords) = rownames(x)
    colnames(coords) = paste0('Dim.', 1:ncol(coords))
    result = list(eigenvalues = eigenvalues, loadings = loadings, coords = coords)

  }

  return(result)

}



#' @title runPCA_factominer
#' @name runPCA_factominer
#' @description performs PCA based on the factominer package
#' @param x matrix or object that can be converted to matrix
#' @param ncp number of principal components to calculate
#' @param scale scale features
#' @param rev reverse PCA
#' @param set_seed use of seed
#' @param seed_number seed number to use
#' @keywords internal
#' @return list of eigenvalues, loadings and pca coordinates
runPCA_factominer = function(x,
                             ncp = 100,
                             scale = TRUE,
                             rev = FALSE,
                             set_seed = TRUE,
                             seed_number = 1234,
                             ...) {

  # verify if optional package is installed
  package_check(pkg_name = "FactoMineR", repository = "CRAN")

  if(!methods::is(x, 'matrix')) {
    x = as.matrix(x)
  }

  if(isTRUE(rev)) {

    x = t_flex(x)

    if(ncp > nrow(x)) {
      warning("ncp > nrow(x), will be set to nrow(x)")
      ncp = nrow(x)
    }

    # start seed
    if(isTRUE(set_seed)) {
      set.seed(seed = seed_number)
    }

    pca_res = FactoMineR::PCA(X = x, ncp = ncp, scale.unit = scale, graph = F, ...)

    # exit seed
    if(isTRUE(set_seed)) {
      set.seed(seed = Sys.time())
    }

    # eigenvalues
    eigenvalues = pca_res$eig[,1]

    # PC loading
    loadings = pca_res$ind$coord
    rownames(loadings) = rownames(x)
    colnames(loadings) = paste0('Dim.', 1:ncol(loadings))

    # coordinates
    coords = sweep(pca_res$var$coord, 2, sqrt(eigenvalues[1:ncp]), FUN = "/")
    rownames(coords) = colnames(x)
    colnames(coords) = paste0('Dim.', 1:ncol(coords))

    result = list(eigenvalues = eigenvalues, loadings = loadings, coords = coords)

  } else {

    if(ncp > ncol(x)) {
      warning("ncp > ncol(x), will be set to ncol(x)")
      ncp = ncol(x)
    }

    # start seed
    if(isTRUE(set_seed)) {
      set.seed(seed = seed_number)
    }

    pca_res = FactoMineR::PCA(X = x, ncp = ncp, scale.unit = scale, graph = F, ...)

    # exit seed
    if(isTRUE(set_seed)) {
      set.seed(seed = Sys.time())
    }

    # eigenvalues
    eigenvalues = pca_res$eig[,1]

    # PC loading
    loadings = sweep(pca_res$var$coord, 2, sqrt(eigenvalues[1:ncp]), FUN = "/")
    rownames(loadings) = colnames(x)
    colnames(loadings) = paste0('Dim.', 1:ncol(loadings))

    # coordinates
    coords = pca_res$ind$coord
    rownames(coords) = rownames(x)
    colnames(coords) = paste0('Dim.', 1:ncol(coords))

    result = list(eigenvalues = eigenvalues, loadings = loadings, coords = coords)

  }

  return(result)

}


#' @title runPCA_BiocSingular
#' @name runPCA_BiocSingular
#' @description Performs PCA based on the biocSingular package
#' @param x matrix or object that can be converted to matrix
#' @param ncp number of principal components to calculate
#' @param center center the matrix before pca
#' @param scale scale features
#' @param rev reverse PCA
#' @param set_seed use of seed
#' @param seed_number seed number to use
#' @param BSPARAM method to use
#' @param BSParameters additonal parameters for method
#' @keywords internal
#' @return list of eigenvalues, loadings and pca coordinates
runPCA_BiocSingular = function(x,
                               ncp = 100,
                               center = TRUE,
                               scale = TRUE,
                               rev = FALSE,
                               set_seed = TRUE,
                               seed_number = 1234,
                               BSPARAM = c('irlba', 'exact', 'random'),
                               BSParameters = list(NA),
                               ...) {

  BSPARAM = match.arg(BSPARAM, choices = c('irlba', 'exact', 'random'))

  min_ncp = min(dim(x))

  if(ncp >= min_ncp) {
    warning("ncp >= minimum dimension of x, will be set to minimum dimension of x - 1")
    ncp = min_ncp-1
  }

  # start seed
  if(isTRUE(set_seed)) {
    set.seed(seed = seed_number)
  }

  if(isTRUE(rev)) {

    x = t_flex(x)

    if(BSPARAM == 'irlba') {
      pca_res = BiocSingular::runPCA(x = x, rank = ncp,
                                     center = center, scale = scale,
                                     BSPARAM = BiocSingular::IrlbaParam(BSParameters),
                                     ...)
    } else if(BSPARAM == 'exact') {
      pca_res = BiocSingular::runPCA(x = x, rank = ncp,
                                     center = center, scale = scale,
                                     BSPARAM = BiocSingular::ExactParam(BSParameters),
                                     ...)
    } else if(BSPARAM == 'random') {
      pca_res = BiocSingular::runPCA(x = x, rank = ncp,
                                     center = center, scale = scale,
                                     BSPARAM = BiocSingular::RandomParam(BSParameters),
                                     ...)
    }



    # eigenvalues
    eigenvalues = pca_res$sdev^2
    # PC loading
    loadings = pca_res$x
    rownames(loadings) = rownames(x)
    colnames(loadings) = paste0('Dim.', 1:ncol(loadings))
    # coordinates
    coords = pca_res$rotation
    rownames(coords) = colnames(x)
    colnames(coords) = paste0('Dim.', 1:ncol(coords))
    result = list(eigenvalues = eigenvalues, loadings = loadings, coords = coords)

  } else {

    if(BSPARAM == 'irlba') {
      pca_res = BiocSingular::runPCA(x = x, rank = ncp,
                                     center = center, scale = scale,
                                     BSPARAM = BiocSingular::IrlbaParam(BSParameters),
                                     ...)
    } else if(BSPARAM == 'exact') {
      pca_res = BiocSingular::runPCA(x = x, rank = ncp,
                                     center = center, scale = scale,
                                     BSPARAM = BiocSingular::ExactParam(BSParameters),
                                     ...)
    } else if(BSPARAM == 'random') {
      pca_res = BiocSingular::runPCA(x = x, rank = ncp,
                                     center = center, scale = scale,
                                     BSPARAM = BiocSingular::RandomParam(BSParameters),
                                     ...)
    }

    # eigenvalues
    eigenvalues = pca_res$sdev^2
    # PC loading
    loadings = pca_res$rotation
    rownames(loadings) = colnames(x)
    colnames(loadings) = paste0('Dim.', 1:ncol(loadings))
    # coordinates
    coords = pca_res$x
    rownames(coords) = rownames(x)
    colnames(coords) = paste0('Dim.', 1:ncol(coords))
    result = list(eigenvalues = eigenvalues, loadings = loadings, coords = coords)

  }

  # exit seed
  if(isTRUE(set_seed)) {
    set.seed(seed = Sys.time())
  }

  return(result)

}






#' @title create_genes_to_use_matrix
#' @name create_genes_to_use_matrix
#' @description subsets matrix based on vector of genes or hvf column
#' @param gobject giotto object
#' @param feat_type feature type
#' @param spat_unit spatial unit
#' @param sel_matrix selected expression matrix
#' @param feats_to_use feats to use, character or vector of features
#' @param verbose verbosity
#' @keywords internal
#' @return subsetted matrix based on selected genes
create_feats_to_use_matrix = function(gobject,
                                      feat_type = NULL,
                                      spat_unit = NULL,
                                      sel_matrix,
                                      feats_to_use,
                                      verbose = FALSE) {


  # Set feat_type and spat_unit
  spat_unit = set_default_spat_unit(gobject = gobject,
                                    spat_unit = spat_unit)
  feat_type = set_default_feat_type(gobject = gobject,
                                    spat_unit = spat_unit,
                                    feat_type = feat_type)

  # cell metadata
  feat_metadata = fDataDT(gobject,
                          spat_unit = spat_unit,
                          feat_type = feat_type)

  # for hvf features
  if(is.character(feats_to_use) & length(feats_to_use) == 1) {
    if(feats_to_use %in% colnames(feat_metadata)) {
      if(verbose == TRUE) {
          wrap_msg('"', feats_to_use, '" was found in the feats metadata information and will be used to select highly variable features',
                   sep = '')
        }
      feats_to_use = feat_metadata[get(feats_to_use) == 'yes'][['feat_ID']]
      sel_matrix = sel_matrix[rownames(sel_matrix) %in% feats_to_use, ]
    } else {
      if(verbose == TRUE) wrap_msg('"', feats_to_use, '" was not found in the gene metadata information, all genes will be used',
                                   sep = '')
    }
  } else {
    if(verbose == TRUE) wrap_msg('a custom vector of genes will be used to subset the matrix')
    sel_matrix = sel_matrix[rownames(sel_matrix) %in% feats_to_use, ]
  }

  if(verbose == TRUE) cat('class of selected matrix: ', class(sel_matrix), '\n')
  return(sel_matrix)

}



#' @title runPCA
#' @name runPCA
#' @description runs a Principal Component Analysis
#' @param gobject giotto object
#' @param spat_unit spatial unit
#' @param feat_type feature type
#' @param expression_values expression values to use
#' @param reduction cells or genes
#' @param name arbitrary name for PCA run
#' @param feats_to_use subset of features to use for PCA
#' @param genes_to_use deprecated use feats_to_use
#' @param return_gobject boolean: return giotto object (default = TRUE)
#' @param center center data first (default = TRUE)
#' @param scale_unit scale features before PCA (default = TRUE)
#' @param ncp number of principal components to calculate
#' @param method which implementation to use
#' @param method_params additional parameters
#' @param rev do a reverse PCA
#' @param set_seed use of seed
#' @param seed_number seed number to use
#' @param verbose verbosity of the function
#' @param ... additional parameters for PCA (see details)
#' @return giotto object with updated PCA dimension recuction
#' @details See \code{\link[BiocSingular]{runPCA}} and \code{\link[FactoMineR]{PCA}} for more information about other parameters.
#' \itemize{
#'   \item feats_to_use = NULL: will use all features from the selected matrix
#'   \item feats_to_use = <hvg name>: can be used to select a column name of
#'   highly variable features, created by (see \code{\link{calculateHVF}})
#'   \item feats_to_use = c('geneA', 'geneB', ...): will use all manually provided features
#' }
#' @export
runPCA <- function(gobject,
                   spat_unit = NULL,
                   feat_type = NULL,
                   expression_values = c('normalized', 'scaled', 'custom'),
                   reduction = c('cells', 'feats'),
                   name = NULL,
                   feats_to_use = 'hvf',
                   genes_to_use = NULL,
                   return_gobject = TRUE,
                   center = TRUE,
                   scale_unit = TRUE,
                   ncp = 100,
                   method = c('irlba', 'exact', 'random','factominer'),
                   method_params = list(NA),
                   rev = FALSE,
                   set_seed = TRUE,
                   seed_number = 1234,
                   verbose = TRUE,
                   ...) {


  # Set feat_type and spat_unit
  spat_unit = set_default_spat_unit(gobject = gobject,
                                    spat_unit = spat_unit)
  feat_type = set_default_feat_type(gobject = gobject,
                                    spat_unit = spat_unit,
                                    feat_type = feat_type)

  # specify name to use for pca
  if(is.null(name)) {
    if(feat_type == 'rna') {
      name = 'pca'
    } else {
      name = paste0(feat_type,'.','pca')
    }
  }

  ## deprecated arguments
  if(!is.null(genes_to_use)) {
    feats_to_use = genes_to_use
    warning('genes_to_use is deprecated, use feats_to_use in the future \n')
  }

  # expression values to be used
  values = match.arg(expression_values, unique(c('normalized', 'scaled', 'custom', expression_values)))
  expr_values = get_expression_values(gobject = gobject,
                                      feat_type = feat_type,
                                      spat_unit = spat_unit,
                                      values = values,
                                      output = 'exprObj')
  provenance = prov(expr_values)
  expr_values = expr_values[] # extract matrix

  ## subset matrix
  if(!is.null(feats_to_use)) {
    expr_values = create_feats_to_use_matrix(gobject = gobject,
                                             spat_unit = spat_unit,
                                             feat_type = feat_type,
                                             sel_matrix = expr_values,
                                             feats_to_use = feats_to_use,
                                             verbose = verbose)
  }


  # do PCA dimension reduction
  reduction = match.arg(reduction, c('cells', 'feats'))

  # PCA implementation
  method = match.arg(method, c('irlba', 'exact', 'random','factominer'))

  if(reduction == 'cells') {
    # PCA on cells
    if(method %in% c('irlba', 'exact', 'random')) {
      pca_object = runPCA_BiocSingular(x = t_flex(expr_values),
                                       center = center,
                                       scale = scale_unit,
                                       ncp = ncp,
                                       rev = rev,
                                       set_seed = set_seed,
                                       seed_number = seed_number,
                                       BSPARAM = method,
                                       BSParameters = method_params,
                                       ...)
    } else if(method == 'factominer') {
      pca_object = runPCA_factominer(x = t_flex(expr_values),
                                     scale = scale_unit,
                                     ncp = ncp, rev = rev,
                                     set_seed = set_seed,
                                     seed_number = seed_number,
                                     ...)
    } else {
      stop('only PCA methods from the BiocSingular and factominer package have been implemented \n')
    }

  } else {
    # PCA on genes
    if(method %in% c('irlba', 'exact', 'random')) {
      pca_object = runPCA_BiocSingular(x = expr_values,
                                       center = center,
                                       scale = scale_unit,
                                       ncp = ncp,
                                       rev = rev,
                                       set_seed = set_seed,
                                       seed_number = seed_number,
                                       BSPARAM = method,
                                       BSParameters = method_params,
                                       ...)

    } else if(method == 'factominer') {
      pca_object = runPCA_factominer(x = expr_values,
                                              scale = scale_unit, ncp = ncp, rev = rev,
                                              set_seed = set_seed, seed_number = seed_number, ...)
    } else {
      stop('only PCA methods from the irlba and factominer package have been implemented \n')
    }

  }


  if(return_gobject == TRUE) {

    pca_names = list_dim_reductions_names(gobject = gobject,
                                          data_type = reduction,
                                          spat_unit = spat_unit,
                                          feat_type = feat_type,
                                          dim_type = 'pca')

    if(name %in% pca_names) {
      cat('\n ', name, ' has already been used, will be overwritten \n')
    }

    if (reduction == "cells") {
      my_row_names = colnames(expr_values)
    } else {
      my_row_names = rownames(expr_values)
    }

    dimObject = create_dim_obj(name = name,
                               feat_type = feat_type,
                               spat_unit = spat_unit,
                               provenance = provenance,
                               reduction = reduction,
                               reduction_method = 'pca',
                               coordinates = pca_object$coords,
                               misc = list(eigenvalues = pca_object$eigenvalues,
                                           loadings = pca_object$loadings),
                               my_rownames = my_row_names)

    ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
    gobject = set_dimReduction(gobject = gobject, dimObject = dimObject)
    ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###


    ## update parameters used ##
    gobject = update_giotto_params(gobject, description = '_pca')
    return(gobject)


  } else {
    return(pca_object)
  }
}


## * PC estimates ####
# ------------------ #

#' @title create_screeplot
#' @name create_screeplot
#' @description create screeplot with ggplot
#' @param pca_obj pca dimension reduction object
#' @param ncp number of principal components to calculate
#' @param ylim y-axis limits on scree plot
#' @return ggplot
#' @keywords internal
create_screeplot = function(pca_obj, ncp = 20, ylim = c(0, 20)) {


  # data.table: set global variable
  PC = NULL

  eigs = slot(pca_obj, 'misc')$eigenvalues

  # variance explained
  var_expl = eigs/sum(eigs)*100
  var_expl_cum = cumsum(eigs)/sum(eigs)*100

  # create data.table
  screeDT = data.table::data.table('PC' = paste0('PC.', 1:length(var_expl)),
                                   'var_expl' = var_expl,
                                   'var_expl_cum' = var_expl_cum)
  screeDT[, PC := factor(PC, levels = PC)]

  max_ncp = length(eigs)
  ncp = ifelse(ncp > max_ncp, max_ncp, ncp)

  pl = ggplot2::ggplot()
  pl = pl + ggplot2::theme_bw()
  pl = pl + ggplot2::geom_bar(data = screeDT[1:ncp], ggplot2::aes(x = PC, y = var_expl), stat = 'identity')
  pl = pl + ggplot2::coord_cartesian(ylim = ylim)
  pl = pl + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, vjust = 1))
  pl = pl + ggplot2::labs(x = '', y = '% of variance explained per PC')

  cpl = ggplot2::ggplot()
  cpl = cpl + ggplot2::theme_bw()
  cpl = cpl + ggplot2::geom_bar(data = screeDT[1:ncp], ggplot2::aes(x = PC, y = var_expl_cum), stat = 'identity')
  cpl = cpl + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, vjust = 1))
  cpl = cpl + ggplot2::labs(x = '', y = 'cumulative % of variance explained')

  savelist = list(pl, cpl)

  ## combine plots with cowplot
  combo_plot <- cowplot::plot_grid(plotlist = savelist,
                                   ncol = 1,
                                   rel_heights = c(1),
                                   rel_widths = c(1),
                                   align = 'v')

  return(combo_plot)

}



#' @title screePlot
#' @name screePlot
#' @description identify significant principal components (PCs) using an screeplot (a.k.a. elbowplot)
#' @param gobject giotto object
#' @param spat_unit spatial unit
#' @param feat_type feature type
#' @param name name of PCA object if available
#' @param expression_values expression values to use
#' @param reduction cells or features
#' @param method which implementation to use
#' @param rev do a reverse PCA
#' @param feats_to_use subset of features to use for PCA
#' @param genes_to_use deprecated, use feats_to_use
#' @param center center data before PCA
#' @param scale_unit scale features before PCA
#' @param ncp number of principal components to calculate
#' @param ylim y-axis limits on scree plot
#' @param verbose verobsity
#' @param show_plot show plot
#' @param return_plot return ggplot object
#' @param save_plot directly save the plot [boolean]
#' @param save_param list of saving parameters from all_plots_save_function()
#' @param default_save_name default save name for saving, don't change, change save_name in save_param
#' @param ... additional arguments to pca function, see \code{\link{runPCA}}
#' @return ggplot object for scree method
#' @details
#'  Screeplot works by plotting the explained variance of each
#'  individual PC in a barplot allowing you to identify which PC provides a significant
#'  contribution (a.k.a 'elbow method'). \cr
#'  Screeplot will use an available pca object, based on the parameter 'name', or it will
#'  create it if it's not available (see \code{\link{runPCA}})
#' @export
screePlot = function(gobject,
                     spat_unit = NULL,
                     feat_type = NULL,
                     name = NULL,
                     expression_values = c('normalized', 'scaled', 'custom'),
                     reduction = c('cells', 'feats'),
                     method = c('irlba', 'exact', 'random','factominer'),
                     rev = FALSE,
                     feats_to_use = NULL,
                     genes_to_use = NULL,
                     center = F,
                     scale_unit = F,
                     ncp = 100,
                     ylim = c(0, 20),
                     verbose = T,
                     show_plot = NA,
                     return_plot = NA,
                     save_plot = NA,
                     save_param = list(),
                     default_save_name = 'screePlot',
                     ...) {


  # Set feat_type and spat_unit
  spat_unit = set_default_spat_unit(gobject = gobject,
                                    spat_unit = spat_unit)
  feat_type = set_default_feat_type(gobject = gobject,
                                    spat_unit = spat_unit,
                                    feat_type = feat_type)

  # specify name to use for screeplot
  if(is.null(name)) {
    if(feat_type == 'rna') {
      name = 'pca'
    } else {
      name = paste0(feat_type,'.','pca')
    }
  }

  ## deprecated arguments
  if(!is.null(genes_to_use)) {
    feats_to_use = genes_to_use
    warning('genes_to_use is deprecated, use feats_to_use in the future \n')
  }

  # select direction of reduction
  reduction = match.arg(reduction, c('cells', 'feats'))
  pca_obj = get_dimReduction(gobject = gobject,
                             spat_unit = spat_unit,
                             feat_type = feat_type,
                             reduction = reduction,
                             reduction_method = 'pca',
                             name = name,
                             output = 'dimObj')

  #gobject@dimension_reduction[[reduction]][[spat_unit]]$pca[[name]]

  # print, return and save parameters
  show_plot = ifelse(is.na(show_plot), readGiottoInstructions(gobject, param = 'show_plot'), show_plot)
  save_plot = ifelse(is.na(save_plot), readGiottoInstructions(gobject, param = 'save_plot'), save_plot)
  return_plot = ifelse(is.na(return_plot), readGiottoInstructions(gobject, param = 'return_plot'), return_plot)


  # if pca already exists plot
  if(!is.null(pca_obj)) {
    if(verbose == TRUE) cat('PCA with name: ', name, ' already exists and will be used for the screeplot \n')

    screeplot = create_screeplot(pca_obj = pca_obj, ncp = ncp, ylim = ylim)

  } else {
    # if pca doesn't exists, then create pca and then plot
    if(verbose == TRUE) cat('PCA with name: ', name, ' does NOT exists, PCA will be done first \n')

    # expression values to be used
    values = match.arg(expression_values, unique(c('normalized', 'scaled', 'custom', expression_values)))
    expr_values = get_expression_values(gobject = gobject,
                                        spat_unit = spat_unit,
                                        feat_type = feat_type,
                                        values = values,
                                        output = 'exprObj')

    provenance = prov(expr_values)
    expr_values = expr_values[] # extract matrix

    # PCA implementation
    method = match.arg(method, c('irlba', 'exact', 'random','factominer'))

    ## subset matrix
    if(!is.null(feats_to_use)) {
      expr_values = create_feats_to_use_matrix(gobject = gobject,
                                               spat_unit = spat_unit,
                                               feat_type = feat_type,
                                               sel_matrix = expr_values,
                                               feats_to_use = feats_to_use,
                                               verbose = verbose)
    }

    # reduction of cells
    if(reduction == 'cells') {

      # PCA on cells
      if(method == 'irlba') {
        pca_object = runPCA_prcomp_irlba(x = t_flex(expr_values), center = center, scale = scale_unit, ncp = ncp, rev = rev, ...)
      } else if(method == 'factominer') {
        pca_object = runPCA_factominer(x = t_flex(expr_values), scale = scale_unit, ncp = ncp, rev = rev, ...)
      } else {
        stop('only PCA methods from the irlba and factominer package have been implemented \n')
      }

      dimObject = create_dim_obj(name = name,
                                 feat_type = feat_type,
                                 spat_unit = spat_unit,
                                 provenance = provenance,
                                 reduction = reduction,
                                 reduction_method = 'pca',
                                 coordinates = pca_object$coords,
                                 misc = list(eigenvalues = pca_object$eigenvalues,
                                             loadings = pca_object$loadings),
                                 my_rownames = colnames(expr_values))

      screeplot = create_screeplot(pca_obj = dimObject, ncp = ncp, ylim = ylim)
    }

  }

  ## print plot
  if(show_plot == TRUE) {
    print(screeplot)
  }

  ## save plot
  if(save_plot == TRUE) {
    do.call('all_plots_save_function', c(list(gobject = gobject, plot_object = screeplot, default_save_name = default_save_name), save_param))
  }

  ## return plot
  if(return_plot == TRUE) {
    return(screeplot)
  }
}


#' @title create_jackstrawplot
#' @name create_jackstrawplot
#' @description create jackstrawplot with ggplot
#' @param jackstraw_data result from jackstraw function
#' @param ncp number of principal components to calculate
#' @param ylim y-axis limits on jackstraw plot
#' @param threshold p.value threshold to call a PC significant
#' @keywords internal
#' @return ggplot
create_jackstrawplot = function(jackstraw_data,
                                ncp = 20,
                                ylim = c(0, 1),
                                threshold = 0.01) {

  # data.table variables
  PC = p.val = NULL

  testDT = data.table(PC = paste0('PC.', 1:length(jackstraw_data)),
                      p.val = jackstraw_data)
  testDT[, PC := factor(PC, levels = PC)]
  testDT[, sign := ifelse(p.val <= threshold, 'sign', 'n.s.')]

  pl = ggplot2::ggplot()
  pl = pl + ggplot2::theme_bw()
  pl = pl + ggplot2::geom_point(data = testDT[1:ncp], ggplot2::aes(x = PC, y = p.val, fill = sign), shape = 21)
  pl = pl + ggplot2::scale_fill_manual(values  = c('n.s.' = 'lightgrey', 'sign' = 'darkorange'))
  pl = pl + ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, hjust = 1, vjust = 1))
  pl = pl + ggplot2::coord_cartesian(ylim = ylim)
  pl = pl + ggplot2::theme(panel.grid.major.x = ggplot2::element_blank())
  pl = pl + ggplot2::labs(x = '', y = 'p-value per PC')

  return(pl)

}


#' @title jackstrawPlot
#' @name jackstrawPlot
#' @description identify significant prinicipal components (PCs)
#' @param gobject giotto object
#' @param feat_type feature type
#' @param spat_unit spatial unit
#' @param expression_values expression values to use
#' @param reduction cells or genes
#' @param feats_to_use subset of features to use for PCA
#' @param genes_to_use deprecated, use feats_to_use
#' @param center center data before PCA
#' @param scale_unit scale features before PCA
#' @param ncp number of principal components to calculate
#' @param ylim y-axis limits on jackstraw plot
#' @param iter number of interations for jackstraw
#' @param threshold p-value threshold to call a PC significant
#' @param verbose show progress of jackstraw method
#' @param show_plot show plot
#' @param return_plot return ggplot object
#' @param save_plot directly save the plot [boolean]
#' @param save_param list of saving parameters from all_plots_save_function()
#' @param default_save_name default save name for saving, don't change, change save_name in save_param
#' @return ggplot object for jackstraw method
#' @details
#'  The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} function. By
#'  systematically permuting genes it identifies robust, and thus significant, PCs.
#'  \cr
#' @export
jackstrawPlot = function(gobject,
                         spat_unit = NULL,
                         feat_type = NULL,
                         expression_values = c('normalized', 'scaled', 'custom'),
                         reduction = c('cells', 'feats'),
                         feats_to_use = NULL,
                         genes_to_use = NULL,
                         center = FALSE,
                         scale_unit = FALSE,
                         ncp = 20,
                         ylim = c(0, 1),
                         iter = 10,
                         threshold = 0.01,
                         verbose = TRUE,
                         show_plot = NA,
                         return_plot = NA,
                         save_plot = NA,
                         save_param = list(),
                         default_save_name = 'jackstrawPlot') {


  # Set feat_type and spat_unit
  spat_unit = set_default_spat_unit(gobject = gobject,
                                    spat_unit = spat_unit)
  feat_type = set_default_feat_type(gobject = gobject,
                                    spat_unit = spat_unit,
                                    feat_type = feat_type)

  ## deprecated arguments
  if(!is.null(genes_to_use)) {
    feats_to_use = genes_to_use
    warning('genes_to_use is deprecated, use feats_to_use in the future \n')
  }

  package_check(pkg_name = "jackstraw", repository = "CRAN")

  # print message with information #
  if(verbose) message("using 'jackstraw' to identify significant PCs If used in published research, please cite:
  Neo Christopher Chung and John D. Storey (2014).
  'Statistical significance of variables driving systematic variation in high-dimensional data. Bioinformatics")

  # select direction of reduction
  reduction = match.arg(reduction, c('cells', 'feats'))

  # print, return and save parameters
  show_plot = ifelse(is.na(show_plot), readGiottoInstructions(gobject, param = 'show_plot'), show_plot)
  save_plot = ifelse(is.na(save_plot), readGiottoInstructions(gobject, param = 'save_plot'), save_plot)
  return_plot = ifelse(is.na(return_plot), readGiottoInstructions(gobject, param = 'return_plot'), return_plot)

  # expression values to be used
  values = match.arg(expression_values, unique(c('normalized', 'scaled', 'custom', expression_values)))
  expr_values = get_expression_values(gobject = gobject,
                                      spat_unit = spat_unit,
                                      feat_type = feat_type,
                                      values = values,
                                      output = 'matrix')


  ## subset matrix
  if(!is.null(feats_to_use)) {
    expr_values = create_feats_to_use_matrix(gobject = gobject,
                                             spat_unit = spat_unit,
                                             feat_type = feat_type,
                                             sel_matrix = expr_values,
                                             feats_to_use = feats_to_use,
                                             verbose = verbose)
  }

  # reduction of cells
  if(reduction == 'cells') {

    if(scale_unit == TRUE | center == TRUE) {
      expr_values = t_flex(scale(t_flex(expr_values), center = center, scale = scale_unit))
    }

    jtest = jackstraw::permutationPA(dat = as.matrix(expr_values), B = iter, threshold = threshold, verbose = verbose)

    ## results ##
    nr_sign_components = jtest$r
    cat('number of estimated significant components: ', nr_sign_components, '\n')
    final_results = jtest$p
    jackplot = create_jackstrawplot(jackstraw_data = final_results, ncp = ncp, ylim = ylim, threshold = threshold)

  }

  ## print plot
  if(show_plot == TRUE) {
    print(jackplot)
  }

  ## save plot
  if(save_plot == TRUE) {
    do.call('all_plots_save_function', c(list(gobject = gobject, plot_object = jackplot, default_save_name = default_save_name), save_param))
  }

  ## return plot
  if(return_plot == TRUE) {
    return(jackplot)
  }

}


#' @title signPCA
#' @name signPCA
#' @description identify significant prinicipal components (PCs)
#' @param gobject giotto object
#' @param feat_type feature type
#' @param spat_unit spatial unit
#' @param name name of PCA object if available
#' @param method method to use to identify significant PCs
#' @param expression_values expression values to use
#' @param reduction cells or genes
#' @param pca_method which implementation to use
#' @param rev do a reverse PCA
#' @param feats_to_use subset of features to use for PCA
#' @param genes_to_use deprecated, use feats_to_use
#' @param center center data before PCA
#' @param scale_unit scale features before PCA
#' @param ncp number of principal components to calculate
#' @param scree_ylim y-axis limits on scree plot
#' @param jack_iter number of interations for jackstraw
#' @param jack_threshold p-value threshold to call a PC significant
#' @param jack_ylim y-axis limits on jackstraw plot
#' @param verbose verbosity
#' @param show_plot show plot
#' @param return_plot return ggplot object
#' @param save_plot directly save the plot [boolean]
#' @param save_param list of saving parameters from all_plots_save_function()
#' @param default_save_name default save name for saving, don't change, change save_name in save_param
#' @return ggplot object for scree method and maxtrix of p-values for jackstraw
#' @details Two different methods can be used to assess the number of relevant or significant
#'  prinicipal components (PC's). \cr
#'  1. Screeplot works by plotting the explained variance of each
#'  individual PC in a barplot allowing you to identify which PC provides a significant
#'  contribution  (a.k.a. 'elbow method'). \cr
#'  2. The Jackstraw method uses the \code{\link[jackstraw]{permutationPA}} function. By
#'  systematically permuting genes it identifies robust, and thus significant, PCs.
#'  \cr
#' @export
signPCA <- function(gobject,
                    feat_type = NULL,
                    spat_unit = NULL,
                    name = NULL,
                    method = c('screeplot', 'jackstraw'),
                    expression_values = c("normalized", "scaled", "custom"),
                    reduction = c("cells", "feats"),
                    pca_method = c('irlba', 'factominer'),
                    rev = FALSE,
                    feats_to_use = NULL,
                    genes_to_use = NULL,
                    center = T,
                    scale_unit = T,
                    ncp = 50,
                    scree_ylim = c(0,10),
                    jack_iter = 10,
                    jack_threshold = 0.01,
                    jack_ylim = c(0, 1),
                    verbose = TRUE,
                    show_plot = NA,
                    return_plot = NA,
                    save_plot = NA,
                    save_param = list(),
                    default_save_name = 'signPCA') {

  # Set feat_type and spat_unit
  spat_unit = set_default_spat_unit(gobject = gobject,
                                    spat_unit = spat_unit)
  feat_type = set_default_feat_type(gobject = gobject,
                                    spat_unit = spat_unit,
                                    feat_type = feat_type)

  # specify name to use
  if(!is.null(name)) {
    if(feat_type == 'rna') {
      name = 'pca'
    } else {
      name = paste0(feat_type,'.','pca')
    }
  }

  ## deprecated arguments
  if(!is.null(genes_to_use)) {
    feats_to_use = genes_to_use
    warning('genes_to_use is deprecated, use feats_to_use in the future \n')
  }

  # select method
  method = match.arg(method, choices = c('screeplot', 'jackstraw'))

  # select PCA method
  pca_method = match.arg(pca_method, choices = c('irlba', 'factominer'))

  # select direction of reduction
  reduction = match.arg(reduction, c('cells', 'feats'))

  # expression values to be used
  values = match.arg(expression_values, unique(c('normalized', 'scaled', 'custom', expression_values)))
  expr_values = get_expression_values(gobject = gobject,
                                      spat_unit = spat_unit,
                                      feat_type = feat_type,
                                      values = values,
                                      output = 'matrix')

  # print, return and save parameters
  show_plot = ifelse(is.na(show_plot), readGiottoInstructions(gobject, param = 'show_plot'), show_plot)
  save_plot = ifelse(is.na(save_plot), readGiottoInstructions(gobject, param = 'save_plot'), save_plot)
  return_plot = ifelse(is.na(return_plot), readGiottoInstructions(gobject, param = 'return_plot'), return_plot)


  ## subset matrix
  if(!is.null(genes_to_use)) {
    expr_values = create_feats_to_use_matrix(gobject = gobject,
                                             spat_unit = spat_unit,
                                             feat_type = feat_type,
                                             sel_matrix = expr_values,
                                             feats_to_use = feats_to_use,
                                             verbose = verbose)
  }

  # reduction of cells
  if(reduction == 'cells') {

    if(method == 'screeplot') {

      screeplot = screePlot(gobject = gobject,
                            spat_unit = spat_unit,
                            feat_type = feat_type,
                            name = name,
                            expression_values = values,
                            reduction = reduction,
                            feats_to_use = feats_to_use,
                            center = center,
                            scale_unit = scale_unit,
                            ncp = ncp,
                            rev = rev,
                            method = pca_method,
                            ylim = scree_ylim,
                            verbose = verbose,
                            show_plot = FALSE,
                            return_plot = TRUE,
                            save_plot = FALSE,
                            save_param = list(),
                            default_save_name = 'screePlot')

      ## print plot
      if(show_plot == TRUE) {
        print(screeplot)
      }

      ## save plot
      if(save_plot == TRUE) {
        do.call('all_plots_save_function', c(list(gobject = gobject, plot_object = screeplot, default_save_name = default_save_name), save_param))
      }

      ## return plot
      if(return_plot == TRUE) {
        return(screeplot)
      }


    } else if(method == 'jackstraw') {


      jackplot = jackstrawPlot(gobject = gobject,
                               spat_unit = spat_unit,
                               feat_type = feat_type,
                               expression_values = values,
                               reduction = reduction,
                               feats_to_use = feats_to_use,
                               center = center,
                               scale_unit = scale_unit,
                               ncp = ncp,
                               ylim = jack_ylim,
                               iter = jack_iter,
                               threshold = jack_threshold,
                               verbose = verbose,
                               show_plot = FALSE,
                               return_plot = TRUE,
                               save_plot = FALSE,
                               save_param = list(),
                               default_save_name = 'jackstrawPlot')

      ## print plot
      if(show_plot == TRUE) {
        print(jackplot)
      }

      ## save plot
      if(save_plot == TRUE) {
        do.call('all_plots_save_function', c(list(gobject = gobject, plot_object = jackplot, default_save_name = default_save_name), save_param))
      }

      ## return plot
      if(return_plot == TRUE) {
        return(jackplot)
      } else {
        return(jackplot) # poentially return all results instead
      }

    }

  } else {
    cat('gene reduction not yet implemented')
  }
}








## * Dim reduction algos ####
# ------------------------- #

#' @title Run UMAP dimension reduction
#' @name runUMAP
#' @description run UMAP
#' @param gobject giotto object
#' @param spat_unit spatial unit
#' @param feat_type feature type
#' @param expression_values expression values to use
#' @param reduction cells or genes
#' @param dim_reduction_to_use use another dimension reduction set as input
#' @param dim_reduction_name name of dimension reduction set to use
#' @param dimensions_to_use number of dimensions to use as input
#' @param name arbitrary name for UMAP run
#' @param feats_to_use if dim_reduction_to_use = NULL, which genes to use
#' @param genes_to_use deprecated, use feats_to_use
#' @param return_gobject boolean: return giotto object (default = TRUE)
#' @param n_neighbors UMAP param: number of neighbors
#' @param n_components UMAP param: number of components
#' @param n_epochs UMAP param: number of epochs
#' @param min_dist UMAP param: minimum distance
#' @param n_threads UMAP param: threads/cores to use
#' @param spread UMAP param: spread
#' @param set_seed use of seed
#' @param seed_number seed number to use
#' @param verbose verbosity of function
#' @param toplevel_params parameters to extract
#' @param ... additional UMAP parameters
#' @return giotto object with updated UMAP dimension reduction
#' @details See \code{\link[uwot]{umap}} for more information about these and other parameters.
#' \itemize{
#'   \item Input for UMAP dimension reduction can be another dimension reduction (default = 'pca')
#'   \item To use gene expression as input set dim_reduction_to_use = NULL
#'   \item If dim_reduction_to_use = NULL, genes_to_use can be used to select a column name of
#'   highly variable genes (see \code{\link{calculateHVF}}) or simply provide a vector of genes
#'   \item multiple UMAP results can be stored by changing the \emph{name} of the analysis
#' }
#' @export
runUMAP <- function(gobject,
                    feat_type = NULL,
                    spat_unit = NULL,
                    expression_values = c('normalized', 'scaled', 'custom'),
                    reduction = c('cells', 'feats'),
                    dim_reduction_to_use = 'pca',
                    dim_reduction_name = NULL,
                    dimensions_to_use = 1:10,
                    name = NULL,
                    feats_to_use = NULL,
                    genes_to_use = NULL,
                    return_gobject = TRUE,
                    n_neighbors = 40,
                    n_components = 2,
                    n_epochs = 400,
                    min_dist = 0.01,
                    n_threads = NA,
                    spread = 5,
                    set_seed = TRUE,
                    seed_number = 1234,
                    verbose = T,
                    toplevel_params = 2,
                    ...) {


  ## deprecated arguments
  if(!is.null(genes_to_use)) {
    feats_to_use = genes_to_use
    warning('genes_to_use is deprecated, use feats_to_use in the future \n')
  }

  # Set feat_type and spat_unit
  spat_unit = set_default_spat_unit(gobject = gobject,
                                    spat_unit = spat_unit)
  feat_type = set_default_feat_type(gobject = gobject,
                                    spat_unit = spat_unit,
                                    feat_type = feat_type)

  reduction = match.arg(reduction, choices = c('cells', 'feats'))


  # specify dim_reduction_name to use for pca input for umap
  if(!is.null(dim_reduction_to_use)) {
    if(dim_reduction_to_use == 'pca') {
      if(is.null(dim_reduction_name)) {
        if(feat_type == 'rna') {
          dim_reduction_name = 'pca'
        } else {
          dim_reduction_name = paste0(feat_type,'.','pca')
        }
      }
    }
  }



  # specify name to use for umap
  if(is.null(name)) {
    if(feat_type == 'rna') {
      name = 'umap'
    } else {
      name = paste0(feat_type,'.','umap')
    }
  }


  # set cores to use
  n_threads = determine_cores(cores = n_threads)

  ## umap on cells ##
  if(reduction == 'cells') {

    ## using dimension reduction ##
    if(!is.null(dim_reduction_to_use)) {

      ## TODO: check if reduction exists
      dimObj_to_use = get_dimReduction(gobject = gobject,
                                       spat_unit = spat_unit,
                                       feat_type = feat_type,
                                       reduction = reduction,
                                       reduction_method = dim_reduction_to_use,
                                       name = dim_reduction_name,
                                       output = 'dimObj')

      provenance = prov(dimObj_to_use)
      matrix_to_use = dimObj_to_use[]

      matrix_to_use = matrix_to_use[, dimensions_to_use]

      #print(matrix_to_use[1:2,1:2])

      #matrix_to_use = gobject@dimension_reduction[['cells']][[dim_reduction_to_use]][[dim_reduction_name]][['coordinates']][, dimensions_to_use]

    } else {

      ## using original matrix ##
      # expression values to be used
      values = match.arg(expression_values, unique(c('normalized', 'scaled', 'custom', expression_values)))

      expr_values = get_expression_values(gobject = gobject,
                                          spat_unit = spat_unit,
                                          feat_type = feat_type,
                                          values = values,
                                          output = 'exprObj')

      provenance = prov(expr_values)
      expr_values = expr_values[] # extract matrix

      ## subset matrix
      if(!is.null(feats_to_use)) {
        expr_values = create_feats_to_use_matrix(gobject = gobject,
                                                 spat_unit = spat_unit,
                                                 feat_type = feat_type,
                                                 sel_matrix = expr_values,
                                                 feats_to_use = feats_to_use,
                                                 verbose = verbose)
      }

      matrix_to_use = t_flex(expr_values)
    }

    # start seed
    if(isTRUE(set_seed)) {
      set.seed(seed = seed_number)
    }

    ## run umap ##
    uwot_clus <- uwot::umap(X = matrix_to_use, # as.matrix(matrix_to_use) necessary?
                            n_neighbors = n_neighbors,
                            n_components = n_components,
                            n_epochs = n_epochs,
                            min_dist = min_dist,
                            n_threads = n_threads,
                            spread = spread,
                            ...)

    uwot_clus_pos_DT = data.table::as.data.table(uwot_clus)

    # data.table variables
    cell_ID = NULL
    uwot_clus_pos_DT[, cell_ID := rownames(matrix_to_use)]

    # exit seed
    if(isTRUE(set_seed)) {
      set.seed(seed = Sys.time())
    }


    if(return_gobject == TRUE) {

      umap_names = list_dim_reductions_names(gobject = gobject,
                                             data_type = reduction,
                                             spat_unit = spat_unit,
                                             feat_type = feat_type,
                                             dim_type = 'umap')

      if(name %in% umap_names) {
        message('\n ', name, ' has already been used, will be overwritten \n')
      }


      coordinates = uwot_clus
      rownames(coordinates) = rownames(matrix_to_use)

      dimObject = create_dim_obj(name = name,
                                 feat_type = feat_type,
                                 spat_unit = spat_unit,
                                 reduction = reduction,
                                 provenance = provenance,
                                 reduction_method = 'umap',
                                 coordinates = coordinates,
                                 misc = NULL)


      ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
      gobject = set_dimReduction(gobject = gobject, dimObject = dimObject)
      ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###



      ## update parameters used ##
      gobject = update_giotto_params(gobject,
                                     description = '_umap',
                                     return_gobject = TRUE,
                                     toplevel = toplevel_params)
      return(gobject)


    } else {
      return(uwot_clus_pos_DT)
    }




  } else if(reduction == 'feats') {

    message('\n Feats reduction is not yet implemented \n')

  }

}


#' @title Run tSNE dimensional reduction
#' @name runtSNE
#' @description run tSNE
#' @param gobject giotto object
#' @param spat_unit spatial unit
#' @param feat_type feature type
#' @param expression_values expression values to use
#' @param reduction cells or feats
#' @param dim_reduction_to_use use another dimension reduction set as input
#' @param dim_reduction_name name of dimension reduction set to use
#' @param dimensions_to_use number of dimensions to use as input
#' @param name arbitrary name for tSNE run
#' @param feats_to_use if dim_reduction_to_use = NULL, which features to use
#' @param genes_to_use deprecated, use feats_to_use
#' @param return_gobject boolean: return giotto object (default = TRUE)
#' @param dims tSNE param: number of dimensions to return
#' @param perplexity tSNE param: perplexity
#' @param theta tSNE param: theta
#' @param do_PCA_first tSNE param: do PCA before tSNE (default = FALSE)
#' @param set_seed use of seed
#' @param seed_number seed number to use
#' @param verbose verbosity of the function
#' @param ... additional tSNE parameters
#' @return giotto object with updated tSNE dimension recuction
#' @details See \code{\link[Rtsne]{Rtsne}} for more information about these and other parameters. \cr
#' \itemize{
#'   \item Input for tSNE dimension reduction can be another dimension reduction (default = 'pca')
#'   \item To use gene expression as input set dim_reduction_to_use = NULL
#'   \item If dim_reduction_to_use = NULL, genes_to_use can be used to select a column name of
#'   highly variable genes (see \code{\link{calculateHVF}}) or simply provide a vector of genes
#'   \item multiple tSNE results can be stored by changing the \emph{name} of the analysis
#' }
#' @export
runtSNE <- function(gobject,
                    spat_unit = NULL,
                    feat_type = NULL,
                    expression_values = c('normalized', 'scaled', 'custom'),
                    reduction = c('cells', 'feats'),
                    dim_reduction_to_use = 'pca',
                    dim_reduction_name = NULL,
                    dimensions_to_use = 1:10,
                    name = NULL,
                    feats_to_use = NULL,
                    genes_to_use = NULL,
                    return_gobject = TRUE,
                    dims = 2,
                    perplexity = 30,
                    theta = 0.5,
                    do_PCA_first = FALSE,
                    set_seed = TRUE,
                    seed_number = 1234,
                    verbose = TRUE,
                    ...) {


  ## deprecated arguments
  if(!is.null(genes_to_use)) {
    feats_to_use = genes_to_use
    warning('genes_to_use is deprecated, use feats_to_use in the future \n')
  }

  # Set feat_type and spat_unit
  spat_unit = set_default_spat_unit(gobject = gobject,
                                    spat_unit = spat_unit)
  feat_type = set_default_feat_type(gobject = gobject,
                                    spat_unit = spat_unit,
                                    feat_type = feat_type)

  reduction = match.arg(reduction, choices = c('cells', 'feats'))


  # specify dim_reduction_name to use for pca input for tsne
  if(!is.null(dim_reduction_to_use)) {
    if(dim_reduction_to_use == 'pca') {
      if(is.null(dim_reduction_name)) {
        if(feat_type == 'rna') {
          dim_reduction_name = 'pca'
        } else {
          dim_reduction_name = paste0(feat_type,'.','pca')
        }
      }
    }
  }


  # specify name to use for umap
  if(is.null(name)) {
    if(feat_type == 'rna') {
      name = 'tsne'
    } else {
      name = paste0(feat_type,'.','tsne')
    }
  }




  ## tsne on cells ##
  if(reduction == 'cells') {

    ## using dimension reduction ##
    if(!is.null(dim_reduction_to_use)) {

      ## TODO: check if reduction exists
      dimObj_to_use = get_dimReduction(gobject = gobject,
                                       spat_unit = spat_unit,
                                       feat_type = feat_type,
                                       reduction = reduction,
                                       reduction_method = dim_reduction_to_use,
                                       name = dim_reduction_name,
                                       output = 'dimObj')

      provenance = prov(dimObj_to_use)
      matrix_to_use = dimObj_to_use[]
      matrix_to_use = matrix_to_use[, dimensions_to_use]

    } else {
      ## using original matrix ##
      # expression values to be used
      values = match.arg(expression_values, unique(c('normalized', 'scaled', 'custom', expression_values)))
      expr_values = get_expression_values(gobject = gobject,
                                          spat_unit = spat_unit,
                                          feat_type = feat_type,
                                          values = values,
                                          output = 'exprObj')

      provenance = prov(expr_values)
      expr_values = expr_values[] # extract matrix

      ## subset matrix
      if(!is.null(feats_to_use)) {
        expr_values = create_feats_to_use_matrix(gobject = gobject,
                                                 spat_unit = spat_unit,
                                                 feat_type = feat_type,
                                                 sel_matrix = expr_values,
                                                 feats_to_use = feats_to_use,
                                                 verbose = verbose)
      }

      matrix_to_use = t_flex(expr_values)

    }

    # start seed
    if(isTRUE(set_seed)) {
      set.seed(seed = seed_number)
    }

    ## run tSNE ##
    tsne_clus = Rtsne::Rtsne(X = matrix_to_use,
                             dims = dims,
                             perplexity = perplexity,
                             theta = theta,
                             pca = do_PCA_first, ...)

    tsne_clus_pos_DT = data.table::as.data.table(tsne_clus$Y)

    # data.table variables
    cell_ID = NULL
    tsne_clus_pos_DT[, cell_ID := rownames(matrix_to_use)]

    # exit seed
    if(isTRUE(set_seed)) {
      set.seed(Sys.time())
    }


    if(isTRUE(return_gobject)) {

      tsne_names = list_dim_reductions_names(gobject = gobject, data_type = reduction,
                                             spat_unit = spat_unit, feat_type = feat_type,
                                             dim_type = 'tsne')

      if(name %in% tsne_names) {
        cat('\n ', name, ' has already been used, will be overwritten \n')
      }


      coordinates = tsne_clus$Y
      rownames(coordinates) = rownames(matrix_to_use)

      dimObject = create_dim_obj(name = name,
                                 feat_type = feat_type,
                                 spat_unit = spat_unit,
                                 provenance = provenance,
                                 reduction = reduction,
                                 reduction_method = 'tsne',
                                 coordinates = coordinates,
                                 misc = tsne_clus)

      ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
      gobject = set_dimReduction(gobject = gobject, dimObject = dimObject)
      ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###

      ## update parameters used ##
      gobject = update_giotto_params(gobject, description = '_tsne')
      return(gobject)


    } else {
      return(tsne_clus_pos_DT)
    }


  } else if(reduction == 'feats') {

    cat('\n Not yet implemented \n')

  }

}






## * Data Integration ####
# ---------------------- #


#' @title runGiottoHarmony
#' @name runGiottoHarmony
#' @description run UMAP
#' @param gobject giotto object
#' @param spat_unit spatial unit
#' @param feat_type feature type
#' @param vars_use If meta_data is dataframe, this defines which variable(s) to
#'   remove (character vector).
#' @param do_pca Whether to perform PCA on input matrix.
#' @param expression_values expression values to use
#' @param reduction reduction on cells or features
#' @param dim_reduction_to_use use another dimension reduction set as input
#' @param dim_reduction_name name of dimension reduction set to use
#' @param dimensions_to_use number of dimensions to use as input
#' @param name arbitrary name for Harmony run
#' @param feats_to_use if dim_reduction_to_use = NULL, which feats to use
#' @param return_gobject boolean: return giotto object (default = TRUE)
#' @param toplevel_params parameters to extract
#' @param verbose be verbose
#' @param ... additional \code{\link[harmony]{HarmonyMatrix}} parameters
#' @return giotto object with updated Harmony dimension recuction
#' @details This is a simple wrapper for the HarmonyMatrix function in the Harmony package \doi{10.1038/s41592-019-0619-0}.
#' @export
runGiottoHarmony = function(gobject,
                            spat_unit = NULL,
                            feat_type = NULL,
                            vars_use = 'list_ID',
                            do_pca = FALSE,
                            expression_values = c('normalized', 'scaled', 'custom'),
                            reduction = 'cells',
                            dim_reduction_to_use = 'pca',
                            dim_reduction_name = NULL,
                            dimensions_to_use = 1:10,
                            name = NULL,
                            feats_to_use = NULL,
                            toplevel_params = 2,
                            return_gobject = TRUE,
                            verbose = NULL,
                            ...) {


  # verify if optional package is installed
  package_check(pkg_name = "harmony", repository = "CRAN")


  # print message with information #
  message("using 'Harmony' to integrate different datasets. If used in published research, please cite: \n
  Korsunsky, I., Millard, N., Fan, J. et al.
                      Fast, sensitive and accurate integration of single-cell data with Harmony.
                      Nat Methods 16, 1289-1296 (2019).
                      https://doi.org/10.1038/s41592-019-0619-0 ")


  # Set feat_type and spat_unit
  spat_unit = set_default_spat_unit(gobject = gobject,
                                    spat_unit = spat_unit)
  feat_type = set_default_feat_type(gobject = gobject,
                                    spat_unit = spat_unit,
                                    feat_type = feat_type)


  # specify dim_reduction_name to use for pca input for umap
  if(!is.null(dim_reduction_to_use)) {
    if(dim_reduction_to_use == 'pca') {
      if(is.null(dim_reduction_name)) {
        if(feat_type == 'rna') {
          dim_reduction_name = 'pca'
        } else {
          dim_reduction_name = paste0(feat_type,'.','pca')
        }
      }
    }
  }


  # specify name to use for harmony
  if(is.null(name)) {
    if(feat_type == 'rna') {
      name = 'harmony'
    } else {
      name = paste0(feat_type,'.','harmony')
    }
  }




  # set cores to use
  #n_threads = determine_cores(cores = n_threads)


  ## using dimension reduction ##
  if(!is.null(dim_reduction_to_use)) {

    ## TODO: check if reduction exists
    matrix_to_use = get_dimReduction(gobject = gobject,
                                     spat_unit = spat_unit,
                                     feat_type = feat_type,
                                     reduction = reduction, # set to spat_unit?
                                     reduction_method = dim_reduction_to_use,
                                     name = dim_reduction_name,
                                     output = 'dimObj')
    provenance = prov(matrix_to_use)
    matrix_to_use = matrix_to_use[]

    matrix_to_use = matrix_to_use[, dimensions_to_use]

  } else {

    ## using original matrix ##
    # expression values to be used
    values = match.arg(expression_values, unique(c('normalized', 'scaled', 'custom', expression_values)))
    expr_values = get_expression_values(gobject = gobject,
                                        spat_unit = spat_unit,
                                        feat_type = feat_type,
                                        values = values,
                                        output = 'exprObj')

    provenance = prov(expr_values)
    expr_values = expr_values[] # extract matrix


    ## subset matrix
    if(!is.null(feats_to_use)) {
      expr_values = create_feats_to_use_matrix(gobject = gobject,
                                               feat_type = feat_type,
                                               spat_unit = spat_unit,
                                               sel_matrix = expr_values,
                                               feats_to_use = feats_to_use,
                                               verbose = verbose)
    }

    matrix_to_use = t_flex(expr_values)
  }

  # get metadata
  metadata = pDataDT(gobject, feat_type = feat_type, spat_unit = spat_unit)

  # run harmony
  harmony_results = harmony::HarmonyMatrix(data_mat = matrix_to_use,
                                           meta_data = metadata,
                                           vars_use = vars_use,
                                           do_pca = do_pca,
                                           ...)

  colnames(harmony_results) =  paste0('Dim.', 1:ncol(harmony_results))
  rownames(harmony_results) = rownames(matrix_to_use)

  harmdimObject = create_dim_obj(name = name,
                                 spat_unit = spat_unit,
                                 feat_type = feat_type,
                                 provenance = provenance,
                                 reduction = 'cells', # set to spat_unit?
                                 reduction_method = 'harmony',
                                 coordinates = harmony_results,
                                 misc = NULL)

  # return giotto object or harmony results
  if(return_gobject == TRUE) {

    #harmony_names = names(gobject@dimension_reduction[['cells']][[spat_unit]][['harmony']])

    harmony_names = list_dim_reductions_names(gobject = gobject,
                                              data_type = reduction,
                                              spat_unit = spat_unit,
                                              feat_type = feat_type,
                                              dim_type = 'harmony')

    if(name %in% harmony_names) {
      cat('\n ', name, ' has already been used with harmony, will be overwritten \n')
    }

    ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###
    gobject = set_dimReduction(gobject = gobject, dimObject = harmdimObject)
    ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ### ###


    ## update parameters used ##
    gobject = update_giotto_params(gobject,
                                   description = '_harmony',
                                   return_gobject = TRUE,
                                   toplevel = toplevel_params)
    return(gobject)

  } else {
    return(harmdimObject)
  }

}
drieslab/Giotto_site_suite documentation built on April 26, 2023, 11:51 p.m.