R/dimentionality-reduction.R

Defines functions run_umap run_tsne run_pca run_dr.Seurat run_dr.default run_dr

Documented in run_dr run_pca run_tsne run_umap

#' Run dimensionality reduction, pca, tse, and umap
#'
#' @param data Data to use for dimensionality reduction.
#' @param dr_method Dimensionality reduction method.
#' @param prefix .
#' @param assay .
#' @param var_features .
#' @param features .
#' @param graph .
#' @param num_dim_use .
#' @param reduction .
#' @param num_neighbors .
#' @param num_pcs .
#'
#' @return list of dimensionality reduced/
#'
#' @import dplyr
#' @importFrom Seurat Embeddings DefaultAssay GetAssayData VariableFeatures
#' @export
#' @seealso run_pca(), run_tsne(), run_umap()
run_dr <- function(data, dr_method, prefix, assay = NULL,
                   var_features = FALSE, features = NULL,
                   graph = NULL, num_dim_use = NULL, reduction = NULL,
                   num_neighbors = NULL, num_pcs = NULL, ...) {
  UseMethod("run_dr")
}

#' @export
run_dr.default <- function(data, dr_method = c("pca", "tsne", "umap"), prefix, assay = NULL,
                           var_features = FALSE, features = NULL,
                           graph = NULL, num_dim_use = NULL, reduction = NULL,
                           num_neighbors = NULL, num_pcs = NULL, ...) {
  dr_method <- match.arg(dr_method)

  if (dr_method == "pca") {
    out <- run_pca(data = data, prefix = prefix, num_pcs = num_pcs)
  } else if (dr_method == "tsne") {
    out <- run_tsne(data = data, prefix = prefix)
  } else if (dr_method == "umap") {
    out <- run_umap(data = data, prefix = prefix, num_neighbors = num_neighbors)
  }
}

#' @export
run_dr.Seurat <- function(data, dr_method, prefix, assay = NULL,
                          var_features = FALSE, features = NULL,
                          graph = NULL, num_dim_use = NULL, reduction = NULL,
                          num_neighbors = NULL, num_pcs = NULL, ...) {
  if (var_features) {
    features <- VariableFeatures(data[[assay]])
    data.use <- t(GetAssayData(
      object = data,
      slot = "scale.data",
      assay = assay
    )[features, ])
    # if the features are specified, get features from the specified assay
  } else if (!is.null(features)) {
    data.use <- t(GetAssayData(
      object = data,
      slot = "scale.data",
      assay = assay
    )[features, ])
    # if the number of dimensions is specified, use those dimensions from pca
  } else if (!is.null(num_dim_use)) {
    data.use <- Embeddings(data[[reduction]])[, 1:num_dim_use]
    assay <- DefaultAssay(object = data[[reduction]])

    # if a graph is specified, use the graph
  } else if (!is.null(graph)) {
    data.use <- data[[graph]]
  } else {
    stop("Please specify one of num_dim_use, features, or graph")
  }

  # run the specified dimensionality reduction method with the specified data
  dim_reduction <- run_dr(
    data = data.use, dr_method = dr_method,
    prefix = prefix, assay = assay,
    var_features = var_features, features = features,
    graph = graph, num_dim_use = num_dim_use, reduction = reduction,
    num_neighbors = num_neighbors, num_pcs = num_pcs, ...
  )

  if (dr_method == "pca") {
    cell.embeddings <- as.matrix(dim_reduction$cell.embeddings)
    colnames(cell.embeddings) <- paste0("PC", prefix, 1:ncol(cell.embeddings))
    feature.loadings <- as.matrix(dim_reduction$feature.loadings)
    colnames(feature.loadings) <- paste0("PC", prefix, 1:ncol(feature.loadings))
    dim_red_class <- new(
      Class = "DimReduc",
      cell.embeddings = cell.embeddings,
      feature.loadings = feature.loadings,
      assay.used = assay,
      stdev = dim_reduction$sdev,
      key = paste0("PC", prefix)
    )
  } else if (dr_method == "tsne") {
    cell.embeddings <- as.matrix(dim_reduction)
    colnames(cell.embeddings) <- paste0("tSNE", prefix, 1:ncol(cell.embeddings))
    dim_red_class <- new(
      Class = "DimReduc",
      cell.embeddings = cell.embeddings,
      assay.used = assay,
      key = paste0("tSNE", prefix)
    )
  } else if (dr_method == "umap") {
    cell.embeddings <- as.matrix(dim_reduction)
    colnames(cell.embeddings) <- paste0("UMAP", prefix, 1:ncol(cell.embeddings))
    dim_red_class <- new(
      Class = "DimReduc",
      cell.embeddings = cell.embeddings,
      assay.used = assay,
      key = paste0("UMAP", prefix)
    )
  }
  data[[glue("{dr_method}{prefix}")]] <- dim_red_class

  return(data)
}

#' Run PCA
#'
#' @param data A tibble with metadata.
#' @param num_pcs Maximim number of genes per cell.
#' @param prefix suffix.
#'
#' @return named list of feature loadings, cell embeddings, sdev, output from pca
#'
#' @importFrom irlba prcomp_irlba
#' @export
run_pca <- function(data, num_pcs, prefix = "PC_") {

  # make sure the number of pcs isn't too large
  npcs <- min(num_pcs, nrow(data))

  # calculate PCA
  pca_out <- irlba::prcomp_irlba(data, n = npcs, verbose = FALSE)

  # Extract information
  feature.loadings <- pca_out$rotation
  sdev <- pca_out$sdev
  total.variance <- sum(sdev)
  cell.embeddings <- pca_out$x

  rownames(feature.loadings) <- colnames(data)
  colnames(feature.loadings) <- paste0(prefix, 1:npcs)
  rownames(cell.embeddings) <- rownames(data)
  colnames(cell.embeddings) <- paste0(prefix, 1:npcs)

  return(list(
    feature.loadings = feature.loadings,
    cell.embeddings = cell.embeddings,
    sdev = sdev,
    pca_out = pca_out
  ))
}

#' Run TSNE
#'
#' @param data Data to run tsne on.
#' @param seed.use seed to use.
#' @param dim.embed Number of tsne embeddings to return.
#' @param prefix suffix.
#'
#' @return tsne.
#'
#' @import Rtsne
#'
#' @export
run_tsne <- function(data, seed.use = 1, dim.embed = 2, prefix = "tSNE_") {
  set.seed(seed = seed.use)
  # run tsne
  tsne.data <- Rtsne(data, dims = dim.embed)$Y

  colnames(x = tsne.data) <- paste0(prefix, 1:ncol(x = tsne.data))
  rownames(x = tsne.data) <- rownames(x = data)

  return(tsne.data)
}


#' Run UMAP
#'
#' @param data Data to run UMAP on.
#' @param num_neighbors Number of neighbors.
#' @param min_dist Distance metric.
#' @param prefix Prefix.
#'
#' @return umap
#'
#' @importFrom uwot umap
#' @export
run_umap <- function(data, num_neighbors, min_dist = 0.3, prefix = "UMAP_") {
  # run umap
  umap_out <- umap(data, n_neighbors = num_neighbors, min_dist = min_dist, verbose = FALSE)

  colnames(umap_out) <- paste0(
    prefix,
    1:ncol(umap_out)
  )

  rownames(umap_out) <- rownames(data)

  return(umap_out)
}
igordot/scooter documentation built on Nov. 20, 2023, 5:55 a.m.