R/clustering.flow.R

Defines functions clustering.flow

Documented in clustering.flow

#' Clustering FCS data
#'
#' This function offers multiple automatic clustering methods for a \code{FCS.SCE} object. Those methods available are:\enumerate{
#'           \item SOM: Self-Organizing Map (SOM) based on \href{https://github.com/SofieVG/FlowSOM}{\code{FlowSOM} package}.
#'           \item Phenograph: An implementation of PhenoGraph algorithm (\href{https://github.com/JinmiaoChenLab/Rphenograph}{here} for more information).
#'           \item Seurat: Clustering method based on \href{https://satijalab.org/seurat/}{Seurat}'s single-cell analysis.
#'           \item PARC: R implementation for Phenotyping by Accelerated Refined Community-partitioning (PARC) method, see \href{https://github.com/ShobiStassen/PARC}{Python module}.}
#' @param fcs.SCE A \code{fcs.SCE} object generated through \code{\link[FlowCT:fcs.SCE]{FlowCT::fcs.SCE()}}.
#' @param assay.i Name of matrix stored in the \code{fcs.SCE} object from which calculate SOM clustering. Default = \code{"normalized"}.
#' @param method What method should be used for clustering purposes. Available ones are "SOM", "Phenograph", "Seurat" and "PARC".
#' @param scale Should be data be scale before SOM clustering? (only available for this method). Default = \code{"FALSE"}.
#' @param markers.to.use Markers used for considering within the clustering calculation. Default = \code{"all"}.
#' @param num.k Number of clusters to calculate for methods "SOM" and "Phenograph".
#' @param seurat.res Seurat's resolution to calculate clustering (it indicates the ‘granularity’ of the downstream clustering, with increased values leading to a greater number of clusters). Default = \code{0.4}.
#' @param seurat.dims Number of dimensions to calculated with Seurat's method. Default = \code{1:10}.
#' @keywords SOM Seurat PARC Phenograph unsupervised clustering
#' @export clustering.flow
#' @import reticulate
#' @importFrom SingleCellExperiment colData
#' @importFrom SummarizedExperiment assay
#' @examples
#' \dontrun{
#' fcs <- clustering.flow(fcs, method = "som", num.k = 20)
#' fcs <- clustering.flow(fcs, method = "phenograph", num.k = 40)
#' fcs <- clustering.flow(fcs, method = "Seurat", seurat.res = 0.5)
#' fcs <- clustering.flow(fcs, method = "parc", assay.i = "transformed")
#' }

## setting colData method from SummarizedExperiment
setGeneric("colData<-", getGeneric("colData<-", package = "SummarizedExperiment"))

clustering.flow <- function(fcs.SCE, assay.i = "normalized", method, scale = FALSE, markers.to.use = "all", num.k, seurat.res = 0.4, seurat.dims = 1:10){
  set.seed(333)

  if(tolower(method) == "som"){
    if(!requireNamespace(c("FlowSOM", "ConsensusClusterPlus"), quietly = TRUE)) stop("Packages \"FlowSOM\" and \"ConsensusClusterPlus\" needed for this function to work. Please install them.", call. = FALSE)

    data <- as.flowSet.SE(fcs.SCE, assay.i)
    if(length(markers.to.use) == 1 && markers.to.use == "all") markers.to.use <- rownames(data)

    fsom <- suppressMessages(FlowSOM::ReadInput(data, transform = FALSE, scale = scale)) #read data
    fsom <- suppressMessages(FlowSOM::BuildSOM(fsom, colsToUse = markers.to.use)) #build SOM

    ## metaclustering
    mc <- suppressMessages(ConsensusClusterPlus::ConsensusClusterPlus(t(fsom$map$codes), maxK = num.k, reps = 100,
                                                pItem = 0.9, pFeature = 1, title = "consensus_plots", plot = "pdf",
                                                clusterAlg = "hc", innerLinkage = "average", finalLinkage = "average",
                                                distance = "euclidean", seed = 333, verbose = F))
    unlink("consensus_plots", recursive = TRUE)

    code_clustering1 <- mc[[num.k]]$consensusClass %>% as.factor() #get cluster ids for each cell
    cell_clustering1 <- code_clustering1[fsom$map$mapping[,1]]

    aux_colname <- make.unique(c(colnames(colData(fcs.SCE)), c(paste0("SOM.k", num.k))))[ncol(colData(fcs.SCE))+1]
    colData(fcs.SCE)[,aux_colname] <- cell_clustering1
    return(fcs.SCE)
  }else if(tolower(method) == "seurat"){
    if(!requireNamespace("Seurat", quietly = TRUE)) stop("Package \"Seurat\" needed for this function to work. Please install it.", call. = FALSE) else require(Seurat)

    data <- as.Seurat(fcs.SCE, counts = assayNames(fcs.SCE)[1], data = assay.i)
    if(length(markers.to.use) == 1 && markers.to.use == "all") markers.to.use <- rownames(data)

    s <- ScaleData(data, verbose = FALSE)
    s <- RunPCA(s, features = markers.to.use, verbose = FALSE)
    s <- FindNeighbors(s, dims = seurat.dims)
    s <- FindClusters(s, resolution = seurat.res)

    aux_colname <- make.unique(c(colnames(colData(fcs.SCE)), c(paste0("seurat.r", seurat.res))))[ncol(colData(fcs.SCE))+1]
    colData(fcs.SCE)[,aux_colname] <- as.factor(as.numeric(as.character(s$seurat_clusters))+1)
    return(fcs.SCE)
  }else if(tolower(method) == "phenograph"){
    if(!requireNamespace("Rphenograph", quietly = TRUE)) stop("Packages \"Rphenograph\" needed for this function to work. Please install it.", call. = FALSE)

    data <- as.matrix(t(assay(fcs.SCE, assay.i)))
    if(length(markers.to.use) == 1 && markers.to.use == "all") markers.to.use <- colnames(data)

    Rphenograph_out <- Rphenograph::Rphenograph(data[,markers.to.use], k = num.k)

    aux_colname <- make.unique(c(colnames(colData(fcs.SCE)), c(paste0("phenog.k", num.k))))[ncol(colData(fcs.SCE))+1]
    colData(fcs.SCE)[,aux_colname] <- factor(Rphenograph_out[[2]]$membership)
    return(fcs.SCE)
  }else if(tolower(method) == "parc"){
    if(!requireNamespace("reticulate", quietly = TRUE)) stop("Package \"reticulate\" needed for this function to work. Please install it.", call. = FALSE) else require(reticulate)

    p <- import("parc")
    pres <- p$PARC(t(assay(fcs.SCE, assay.i)))
    pres$run_PARC()
    fcs.SCE$PARC <- as.factor(as.numeric(as.character(unlist(pres$labels)))+1)
    return(fcs.SCE)
  }else{
    stop("Please, indicate a valid method: SOM, Seurat, Phenograph or PARC", call. = F)
  }
}
jgarces02/FlowCT documentation built on March 28, 2023, 12:42 p.m.