R/Clustering_functions.R
In CIARA: Cluster Independent Algorithm for Rare Cell Types Identification

Documented in cluster_analysis_integrate_rare find_resolution markers_cluster_seurat merge_cluster white_black_markers

#' cluster_analysis_integrate_rare
#'
#' @param raw_counts Raw count matrix (n_genes X n_cells).
#' @param project_name Character name of the Seurat project.
#' @param resolution Numeric value specifying the parameter \emph{resolution}
#' used in the Seurat function \emph{FindClusters}.
#' @param neighbors Numeric value specifying the parameter \emph{k.param} in
#' the Seurat function \emph{FindNeighbors}
#' @param max_dimension Numeric value specifying the maximum number of the PCA
#' dimensions used in the parameter \emph{dims} for the Seurat function
#' \emph{FindNeighbors}
#' @param feature_genes vector of features specifying the argument
#' \emph{features} in the Seurat function \emph{RunPCA}.
#' @return Seurat object including raw and normalized counts matrices, UMAP coordinates and cluster result.
#' @author Gabriele Lubatti \email{gabriele.lubatti@@helmholtz-muenchen.de}
#' @seealso
#' \url{https://www.rdocumentation.org/packages/Seurat/versions/4.0.1/topics/FindClusters}
#' \url{https://www.rdocumentation.org/packages/Seurat/versions/4.0.1/topics/FindNeighbors}
#' \url{https://www.rdocumentation.org/packages/Seurat/versions/4.0.1/topics/RunPCA}
#' @export cluster_analysis_integrate_rare
cluster_analysis_integrate_rare <- function(raw_counts, project_name, resolution, neighbors, max_dimension, feature_genes = NULL) {
  if (!(requireNamespace("Seurat", quietly = TRUE))) {
    stop("Package Seurat needed for this function to work. Please install it: install.packages('Seurat')")
  }
  nFeature_RNA <- NULL
  seurat_object <- Seurat::CreateSeuratObject(counts = raw_counts, project = project_name)
  seurat_object <- subset( seurat_object, subset = nFeature_RNA > 0)
  seurat_object <- Seurat::NormalizeData( seurat_object, verbose = FALSE)
  seurat_object <- Seurat::FindVariableFeatures( seurat_object, selection.method = "vst", nfeatures = 2000)
  seurat_object <- Seurat::ScaleData( seurat_object, verbose = FALSE)
  seurat_object <- Seurat::RunPCA( seurat_object, npcs = max_dimension, verbose = FALSE, features = feature_genes)
  seurat_object <- Seurat::RunUMAP( seurat_object, reduction = "pca", dims = 1:20, set.seed = 42)
  seurat_object <- Seurat::FindNeighbors( seurat_object, reduction = "pca", dims = 1:max_dimension, k.param = neighbors)
  seurat_object <- Seurat::FindClusters( seurat_object, resolution = resolution)
  return(seurat_object)}





#' find_resolution
#'
#' @param seurat_object Seurat object as returned by
#' \emph{cluster_analysis_integrate_rare}
#' @param resolution_vector vector with all values of resolution for which the
#' Seurat function \emph{FindClusters} is run
#' @return Clustree object showing the connection between clusters obtained at different level of resolution as specified in \emph{resolution_vector}.
#' @author Gabriele Lubatti \email{gabriele.lubatti@@helmholtz-muenchen.de}
#' @seealso \url{https://CRAN.R-project.org/package=clustree}
#'
#' @export find_resolution
find_resolution <- function(seurat_object, resolution_vector) {
  if (!(requireNamespace("Seurat", quietly = TRUE) & requireNamespace("clustree", quietly = TRUE))) {
    stop("Package Seurat and clustree needed for this function to work. Please install them: install.packages('Seurat') and install.packages('clustree')")
  }
  for (i in resolution_vector) {
    seurat_object <- Seurat::FindClusters(seurat_object, resolution = i)
  }
  clustree::clustree(seurat_object)
}

#' @importFrom ggraph guide_edge_colourbar
#' @export
ggraph::guide_edge_colourbar



#' build_fisher_test
#' @noRd
build_fisher_test <- function(a, b, c) {
  matrix_test <- matrix(c(sum(c[c%in%a]%in%c[c%in%b]), sum(c[c%in%a]%in%c[!c%in%b]), sum(c[!c%in%a]%in%c[c%in%b]), sum(c[!c%in%a]%in%c[!c%in%b])), nrow = 2, ncol = 2, byrow = T)
  fisher.test(matrix_test)
}






#' de_seurat_cluster
#' @noRd
de_seurat_cluster <- function(seurat_object, cluster, names_cell, max_p_value) {
  level <- levels(as.factor(cluster))
  final_markers <- vector("list", length(level))
  for (i in 1:length(level)) {
    markers <- Seurat::FindMarkers(seurat_object, ident.1 = names_cell[cluster == level[i]], ident.2 = names_cell[cluster!=level[i]], only.pos = T)
    markers_new <- markers[markers$p_val_adj <= max_p_value, ]
    markers_new <- markers_new[order(markers_new$p_val_adj), ]
    markers_final <- row.names(markers_new)
    final_markers[[i]] <- markers_final
  }
  return(final_markers)
}











#' merge_cluster
#'
#' @param old_cluster original cluster assignment that need to be updated
#' @param new_cluster new cluster assignment that need to be integrated with
#' \emph{old_cluster}.
#' @param max_number Threshold in size for clusters in \emph{new_cluster}. Only
#' cluster with number of cells smaller than \emph{max_number} will be
#' integrated in \emph{old cluster}. If \emph{max_number} is NULL, then all the clusters in \emph{new_cluster} are integrated in \emph{old cluster}.
#' @return Numeric vector of length equal to \emph{old_cluster} showing the merged cluster assignment between \emph{old cluster} and \emph{new_cluster}.
#' @author Gabriele Lubatti \email{gabriele.lubatti@@helmholtz-muenchen.de}
#'
#' @export merge_cluster
merge_cluster <- function(old_cluster, new_cluster, max_number = NULL) {

  if(is.null(max_number)) {
    cluster_final <- old_cluster
    cluster_final[match(names(new_cluster),names(old_cluster))] <- new_cluster
    return(cluster_final)
  }
  else{
    index_small <- which(table(new_cluster)<max_number)
    cluster_small <- names(table(new_cluster))[index_small]
    if (length(cluster_small) > 0) {
      message(paste0("Clusters with number of cells below ", max_number, " in the new partition: ", length(cluster_small)))
      cluster_final <- old_cluster
    for (i in 1:length(cluster_small)) {
      cluster_final[as.vector(new_cluster) == cluster_small[i]] <- paste(cluster_small[i], "step_2", sep = "-")
    }
    return(cluster_final)
    }
    else {
      warning(paste0("There are not clusters with number of cells below ", max_number))
      cluster_final <- old_cluster
      return(cluster_final)
    }
  }
}





#' test_hvg
#'
#' For each cluster in \emph{cluster}, HVGs are defined with
#' Seurat function \emph{FindVariableFeatures}. A Fisher test is performed to
#' see if there is a statistically significant enrichment between the top
#' \emph{number_hvg} and the \emph{localized_genes}
#'
#' @param cluster Vector of length equal to the number of cells, with cluster
#' assignment.
#' @param localized_genes Character vector with localized genes detected by CIARA.
#' @param background Character vector with all the genes names to use as
#' background for the Fisher test.
#' @param number_hvg Integer value. Number of top HVGs provided by the Seurat
#' function \emph{FindVariableFeatures}.
#' @param min_p_value Threshold on p values provided by Fisher test.
#' @inheritParams cluster_analysis_integrate_rare
#' @return A list with two elements. \item{first element}{The first one is a
#' list with length equal to the number of clusters. Each entry is list of
#' three elements. The first two elements contain the p value and the odds
#' ration given by the Fisher test The third is a vector with genes names that
#' are present both in \emph{localized_genes} and in top \emph{number_hvg} HVGs
#' .} \item{second element}{a character vector with the name of the cluster that
#' have a p value smaller than \emph{min_p_value}.}
#' @author Gabriele Lubatti \email{gabriele.lubatti@@helmholtz-muenchen.de}
#' @seealso
#' \url{https://www.rdocumentation.org/packages/stats/versions/3.6.2/topics/fisher.test}
#' @export test_hvg
test_hvg <- function (raw_counts, cluster, localized_genes, background, number_hvg, min_p_value)
{
  if (!(requireNamespace("Seurat", quietly = TRUE))) {
    stop("Package Seurat needed for this function to work. Please install it: install.packages('Seurat')")
  }
  nFeature_RNA <- NULL
  levels_cluster <- levels(as.factor(cluster))
  final_p_value <- rep(list(0), length(levels))
  for (i in 1:length(levels_cluster)) {
    raw_counts_small <- raw_counts[, cluster == levels_cluster[i]]
    seurat_object <- Seurat::CreateSeuratObject(counts = raw_counts_small, project = "sub_cluster")
    seurat_object <- subset(seurat_object, subset = nFeature_RNA > 0)
    seurat_object <- Seurat::NormalizeData(seurat_object, verbose = FALSE)
    seurat_object <- Seurat::FindVariableFeatures(seurat_object, selection.method = "vst", nfeatures = number_hvg)
    hvg_genes <- seurat_object@assays$RNA@var.features
    fisher_output <- build_fisher_test(localized_genes, hvg_genes, background)
    final_p_value[[i]] <- list(fisher_output[[1]], fisher_output[[3]], localized_genes[localized_genes %in% hvg_genes])
  }
  names(final_p_value) <- levels_cluster
  select_sub <- lapply(final_p_value, function(x) {if(x[[1]]<min_p_value & x[[2]] > 1) {
    return(TRUE)}
    else{return(FALSE)}
  })
  final_sub <- names(select_sub)[select_sub == TRUE]
  return(list(final_p_value, final_sub))
}
















#' cluster_analysis_sub
#'
#' @param name_cluster Character.Name of the original cluster for which the sub
#' clustering is done.
#' @inheritParams cluster_analysis_integrate_rare
#' @return Seurat object including raw and normalized counts matrices and cluster result.
#' @author Gabriele Lubatti \email{gabriele.lubatti@@helmholtz-muenchen.de}
#' @seealso
#' \url{https://www.rdocumentation.org/packages/Seurat/versions/4.0.1/topics/RunPCA}
#' \url{https://www.rdocumentation.org/packages/Seurat/versions/4.0.1/topics/FindVariableFeatures}
#' @export cluster_analysis_sub
cluster_analysis_sub <- function (raw_counts, resolution, neighbors, max_dimension, name_cluster) {
  if (!(requireNamespace("Seurat", quietly = TRUE))) {
    stop("Package Seurat needed for this function to work. Please install it: install.packages('Seurat')")
  }
  nFeature_RNA <- NULL
  seurat_object <- Seurat::CreateSeuratObject(counts = raw_counts, project = "sub_cluster")
  seurat_object <- subset(seurat_object, subset = nFeature_RNA > 0)
  seurat_object <- Seurat::NormalizeData(seurat_object, verbose = FALSE)
  seurat_object <- Seurat::FindVariableFeatures(seurat_object, selection.method = "vst", nfeatures = 2000)
  seurat_object <- Seurat::ScaleData(seurat_object, verbose = FALSE)
  hvg_genes <- seurat_object@assays$RNA@var.features
  genes_cluster <- hvg_genes
  seurat_object <- Seurat::RunPCA(seurat_object, npcs = max_dimension, verbose = FALSE, features = genes_cluster)
  seurat_object <- Seurat::FindNeighbors(seurat_object, reduction = "pca", dims = 1:max_dimension, k.param = neighbors)
  seurat_object <- Seurat::FindClusters(seurat_object, resolution = resolution)
  new_cluster <- as.vector(seurat_object$seurat_clusters)
  for (i in 1:length(levels(seurat_object$seurat_clusters))) {
    new_cluster[as.vector(seurat_object$seurat_clusters) == levels(seurat_object$seurat_clusters)[i]] <- paste(name_cluster,
                                                                                                               new_cluster[as.vector(seurat_object$seurat_clusters) == levels(seurat_object$seurat_clusters)[i]], sep = "_")
  }
  names(new_cluster) <- names(seurat_object$seurat_clusters)
  seurat_object$seurat_clusters <- new_cluster
  return(seurat_object)
}







#' markers_cluster_seurat
#'
#' The Seurat function \emph{FindMarkers} is used to identify general marker
#' for each cluster (specific cluster vs all other cluster). This list of
#' markers is then filtered keeping only the genes that appear as markers in a
#' unique cluster.
#'
#' @param seurat_object Seurat object as returned by
#' \emph{cluster_analysis_sub} or by \emph{cluster_analysis_integrate_rare}.
#' @param cell_names Vector of length equal to the number of cells, with cell
#' names.
#' @param number_top Integer. Number of top marker genes to keep for each
#' cluster.
#' @inheritParams test_hvg
#' @return List of three elements. The first is a vector with \emph{number_top}
#' marker genes for each cluster. The second is a vector with \emph{number_top}
#' marker genes and corresponding cluster. The third element is a vector with
#' all marker genes for each cluster.
#'
#' @author Gabriele Lubatti \email{gabriele.lubatti@@helmholtz-muenchen.de}
#' @seealso \url{https://www.rdocumentation.org/packages/Seurat/versions/4.0.1/topics/FindMarkers}
#'
#' @export markers_cluster_seurat
markers_cluster_seurat <- function(seurat_object, cluster, cell_names, number_top) {
  if (!(requireNamespace("Seurat", quietly = TRUE))) {
    stop("Package Seurat needed for this function to work. Please install it: install.packages('Seurat')")
  }
  level <- levels(as.factor(cluster))
  marker_cluster <- as.list(rep(0, length(level)))
  marker_all <- de_seurat_cluster(seurat_object, cluster, cell_names, 0.05)

  for (i in 1:length(level)) {
    marker_cluster[[i]] <- marker_all[[i]]
    message(paste0("Cluster ", level[i]))
  }
  marker_all <- unlist(marker_cluster)
  marker_all <- marker_all[Biobase::isUnique(marker_all)]

  for (i in 1:length(level)) {
    marker_cluster[[i]] <- marker_cluster[[i]][marker_cluster[[i]]%in%marker_all]
  }
  marker_to_see <- as.list(rep(0, length(level)))
  marker_complete <- as.list(rep(0, length(level)))
  for (i in 1:length(level)) {
    if (length(marker_cluster[[i]]) >= number_top) {
      marker_to_see[[i]] <- marker_cluster[[i]][1:number_top]
      names(marker_to_see[[i]]) <- rep(level[i], length(marker_to_see[[i]]))
      marker_complete[[i]] <- marker_cluster[[i]]
      names(marker_complete[[i]]) <- rep(level[i], length(marker_complete[[i]]))
    }
    if ((length(marker_cluster[[i]]) > 0)&(length(marker_cluster[[i]])<number_top)) {
      marker_to_see[[i]] <- marker_cluster[[i]][1:length(marker_cluster[[i]])]
      names(marker_to_see[[i]]) <- rep(level[i], length(marker_to_see[[i]]))
      marker_complete[[i]] <- marker_cluster[[i]]
      names(marker_complete[[i]]) <- rep(level[i], length(marker_complete[[i]]))
    }
    if (length(marker_cluster[[i]]) == 0) {
      marker_to_see[[i]] <- NULL
      marker_complete[[i]] <- NULL
    }
  }



  marker_top <- unlist(marker_to_see)
  marker_top <- marker_top[marker_top!=0]
  marker_complete <- unlist(marker_complete)
  marker_complete <- marker_complete[marker_complete!=0]
  marker_all_cluster_sing <- as.list(rep(0, length(level)))


  for (i in 1:length(level)) {
    if(sum(names(marker_top) == level[i]) > 0) {
      marker_all_cluster_sing[[i]] <- paste(marker_top[names(marker_top) == level[i]], level[i], sep="_")
    }
    else{}
  }
  marker_all_cluster <- unlist(marker_all_cluster_sing)
  marker_all_cluster <- marker_all_cluster[marker_all_cluster!=0]

  return(list(marker_top, marker_all_cluster, marker_complete))}







#' white_black_markers
#'
#' A white-marker is a gene whose median expression across cells belong to
#' \emph{single_cluster} is greater than \emph{threshold} and in all the other
#' clusters is equal to zero.
#'
#' @param single_cluster Character. Label of one specify cluster
#' @param marker_list Third element of the output list as returned by the
#' function \emph{markers_cluster_seurat}
#' @param threshold Numeric. The median of the genes across cells belong to
#' \emph{single_cluster} has to be greater than \emph{threshold} in order to be
#' consider as a white-black marker for \emph{single_cluster}
#' @inheritParams test_hvg
#' @inheritParams cluster_analysis_integrate_rare
#' @inheritParams plot_genes_sum
#' @return Logical vector of length equal to \emph{marker_list}, with
#' TRUE/FALSE if the gene is/is not a white-black marker for
#' \emph{single_cluster}.
#' @author Gabriele Lubatti \email{gabriele.lubatti@@helmholtz-muenchen.de}
#'
#' @export white_black_markers
white_black_markers <- function(cluster, single_cluster, norm_counts, marker_list, threshold = 0) {
  white_black <- apply(norm_counts[marker_list[names(marker_list) == single_cluster], ], 1, function(x) {
  mean_one <- median(x[cluster == single_cluster])
  mean_different <- rep(0, length(levels(factor(cluster[cluster!=single_cluster]))))
  for ( i in 1:length(levels(factor(cluster[cluster!=single_cluster])))) {
    mean_different[i] <- median(x[cluster == levels(factor(cluster[cluster!=single_cluster]))[i]])}
  if (mean_one > threshold&sum(mean_different) == 0) {
    return(TRUE)
  }
  else{
    return(FALSE)
    }
  })
  return(white_black)
}

Any scripts or data that you put into this service are public.

CIARA documentation built on March 18, 2022, 6:48 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

CIARA
Cluster Independent Algorithm for Rare Cell Types Identification

R/Clustering_functions.R
In CIARA: Cluster Independent Algorithm for Rare Cell Types Identification

Defines functions white_black_markers markers_cluster_seurat merge_cluster de_seurat_cluster build_fisher_test find_resolution cluster_analysis_integrate_rare

Documented in cluster_analysis_integrate_rare find_resolution markers_cluster_seurat merge_cluster white_black_markers

Try the CIARA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CIARA Cluster Independent Algorithm for Rare Cell Types Identification

R/Clustering_functions.R In CIARA: Cluster Independent Algorithm for Rare Cell Types Identification

Defines functions white_black_markers markers_cluster_seurat merge_cluster de_seurat_cluster build_fisher_test find_resolution cluster_analysis_integrate_rare

Documented in cluster_analysis_integrate_rare find_resolution markers_cluster_seurat merge_cluster white_black_markers

Try the CIARA package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CIARA
Cluster Independent Algorithm for Rare Cell Types Identification

R/Clustering_functions.R
In CIARA: Cluster Independent Algorithm for Rare Cell Types Identification