#################################################################
## DEFINITION OF A SPECIFIC CLASS OBJECT : ClusterSet
#################################################################
#' @title
#' ClusterSet-class
#' @description
#' This class is a representation of a partitioning algorithm and is intented to store gene clusters.
#' @slot data A matrix containing the filtered and partitioned data.
#' @slot gene_clusters A list contains the partitioned genes of the dataset. Each element of the list corresponds to a cluster, and contains the indices of the genes assigned to that cluster.
#' @slot top_genes A list contains the top genes from the gene clusters. Each element of the list corresponds to a cluster, and contains the indices of the genes assigned to that cluster ranked by their correlation value within their cluster.
#' @slot gene_clusters_metadata A list contains metadata related to the gene clusters such as the number of gene clusters, their ID, and the number of genes contained in each of them.
#' @slot gene_cluster_annotations A list contains the result obtained from the GO enrichment analysis of gene clusters.
#' @slot cells_metadata A list containing metadata related to the cell clusters such as the clustering results the number of cell clusters, their order, colors associated to each cluster,...
#' @slot dbf_output A list containing the intermediates outputs of the DBF function : dknn, simulated distances, critical distance and fdr values.
#' @slot parameters A list containing the parameter used. Each element of the list correspond to a parameter.
#'
#' @return A ClusterSet object.
#' @export
#'
#' @examples
#' library(Seurat)
#' load_example_dataset("7871581/files/pbmc3k_medium")
#'
#' # Select informative genes
#' res <- select_genes(pbmc3k_medium)
#'
#' # Cluster informative features
#' res <- gene_clustering(res, inflation=1.6)
#' is(res)
#'
#' # Plot heatmap of gene clusters
#' plot_heatmap(res, row_labels = FALSE, line_size_horizontal = 2)
#' plot_heatmap(res[1,], row_labels = FALSE, line_size_horizontal = 2)
#' plot_heatmap(res[1:2, ], row_labels = FALSE, line_size_horizontal = 2)
#' plot_heatmap(res[1:2, 1:15], row_labels = FALSE, line_size_horizontal = 2)
#'
#' # plot the profiles
#' idents <- Seurat::Idents(pbmc3k_medium)
#' plot_profiles(res,
#' ident = idents)
#'
#' # Some methods of the ClusterSet object
#' x <- ncol(res)
#' x <- nrow(res)
#' x <- dim(res)
#' x <- col_names(res)
#' x <- row_names(res)
#' x <- get_genes(res)
#' x<- clust_size(res)
#' x <- c("IL32", "CCL5") %in% res
#' x <- which_clust(res, genes = c("IL32", "CCL5"))
#' res <- top_genes(res, top=5)
#' res <- res[2:3, ]
#' res <- rename_clust(res)
#' clust_names(res)
#' res <- res[, col_names(res)[1:10]]
#' show(res)
#' show_methods(res)
setClass(
"ClusterSet",
representation = list(
data = "matrix",
gene_clusters = "list",
top_genes = "list",
gene_clusters_metadata = "list",
gene_cluster_annotations = "list",
cells_metadata = "data.frame",
dbf_output = "list",
parameters = "list"
),
prototype = list(
data = matrix(nr = 0, nc = 0),
gene_clusters = list(),
top_genes = list(),
gene_clusters_metadata = list(),
gene_cluster_annotations = list(),
cells_metadata = data.frame(),
dbf_output = list(),
parameters = list()
)
)
#################################################################
## REDEFINE SHOW METHOD FOR CLASS OBJECT : ClusterSet
#################################################################
#' @title
#' The show method of a ClusterSet.
#' @description
#' The show method of a ClusterSet.
#' @param object A ClusterSet object.
#' @export show
#' @keywords internal
setMethod("show", signature("ClusterSet"),
function(object) {
cat("\t\tAn object of class ClusterSet\n")
cat("\t\tName:", slot(object, "parameters")$name, "\n")
cat("\t\tMemory used: ", object.size(object), "\n")
cat("\t\tNumber of cells: ", ncol(slot(object, "data")), "\n")
cat("\t\tNumber of informative genes: ",
nrow(slot(object, "data")), "\n")
cat(
"\t\tNumber of gene clusters: ",
slot(object, "gene_clusters_metadata")$number,
"\n"
)
cat("\t\tThis object contains the following informations:\n")
for (i in slotNames(object)) {
cat("\t\t\t - ", i, "\n")
}
if (length(slot(object, "parameters")) > 0) {
for (i in 1:length(slot(object, "parameters"))) {
cat("\t\t\t\t * ",
names(slot(object, "parameters"))[[i]],
" = ",
slot(object, "parameters")[[i]],
"\n")
}
}
})
################################################################################
## NCOL/NROW/DIM METHOD FOR CLASS OBJECT : ClusterSet
################################################################################
#' @title
#' ncol.ClusterSet
#' @description
#' The number of column of a ClusterSet object.
#' @param x The ClusterSet object
#' @keywords internal
ncol.ClusterSet <- function (x) {
ncol(x@data)
}
#' @title
#' nrow.ClusterSet
#' @description
#' The number of rows of a ClusterSet object.
#' @param x The ClusterSet object
#' @keywords internal
nrow.ClusterSet <- function (x) {
nrow(x@data)
}
#' @title Names of gene clusters stored in the ClusterSet object
#' @description
#' The names of the gene clusters stored in the ClusterSet object.
#' @param x The ClusterSet object.
#' @export clust_names
#' @keywords internal
setGeneric("clust_names",
function(x)
standardGeneric("clust_names"))
#' @title The names of the gene clusters stored in the ClusterSet object
#' @description
#' The names of the gene clusters stored in the ClusterSet object.
#' @param x The ClusterSet object
#' @export clust_names
#' @examples
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' clust_names(pbmc3k_medium_clusters)
#'
setMethod("clust_names",
"ClusterSet",
function(x) {
return(names(x@gene_clusters))
})
#' @title Dimension of a ClusterSet object.
#' dim
#' @description
#' The dimension of a ClusterSet object.
#' @param x The ClusterSet object
#' @keywords internal
setMethod("dim", signature(x = "ClusterSet"),
function(x) {
dim(x@data)
})
#' @title Column names of an object
#' @description
#' The column names of a ClusterSet object.
#' @param x The ClusterSet object
#' @export col_names
#' @keywords internal
setGeneric(
name = "col_names",
def = function(x)
standardGeneric("col_names")
)
#' @title Column names of a ClusterSet object.
#' @description
#' The column names of a ClusterSet object.
#' @param x The ClusterSet object
#' @examples
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' col_names(pbmc3k_medium_clusters)
#' @export col_names
setMethod(
f = "col_names",
signature = "ClusterSet",
definition = function(x)
colnames(x@data)
)
#' @title Row names of a ClusterSet object.
#' row_names
#' @description
#' The row names of a ClusterSet object.
#' @param x The ClusterSet object
#' @export row_names
#' @keywords internal
setGeneric("row_names",
function(x)
standardGeneric("row_names"))
#' @title Row names of a ClusterSet object.
#' row_names
#' @description
#' The row names of a ClusterSet object.
#' @param x The ClusterSet object
#' @examples
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' row_names(pbmc3k_medium_clusters)
#' @export row_names
setMethod("row_names", "ClusterSet",
function(x)
rownames(x@data))
################################################################################
## Method for function"[". Subsetting
## ClusterSet object
################################################################################
#' @title Subsetting operator of a ClusterSet object
#' Extract
#' @description
#' The subsetting operator of a ClusterSet object.
#' The i axis correspond to clusters and j to column/cells
#' @param i indices specifying rows to extract. Indices are numeric or character vectors or empty (missing) or NULL.
#' @param j indices specifying column to extract. Indices are numeric or character vectors or empty (missing) or NULL.
#' @param ... See ?'['. Not functionnal here.
#' @param drop For matrices and arrays. If TRUE the result is coerced to the lowest possible dimension. Not functionnal here.
#' @keywords internal
setMethod("[", signature(x = "ClusterSet"),
function (x, i, j, ..., drop = FALSE) {
if (is.null(names(x@gene_clusters_metadata$cluster_id)))
names(x@gene_clusters_metadata$cluster_id) <-
names(x@gene_clusters)
if(is.null(names(x@gene_clusters_metadata$cluster_id)))
names(x@gene_clusters_metadata$cluster_id) <- names(x@gene_clusters)
if (missing(j)) {
if (missing(i)) {
n_data <- x@data
n_gene_clusters <- x@gene_clusters
n_top_genes <- x@top_genes
n_gene_clusters_metadata <- x@gene_clusters_metadata
n_gene_cluster_annotations <-
x@gene_cluster_annotations
n_cells_metadata <- x@cells_metadata
n_dbf_output <- x@dbf_output
} else {
n_data <- x@data[unlist(x@gene_clusters[i]), , drop = FALSE]
n_gene_clusters <- x@gene_clusters[i]
if (length(x@top_genes)) {
n_top_genes <- x@top_genes[i]
} else{
n_top_genes <- x@top_genes
}
n_gene_clusters_metadata <- x@gene_clusters_metadata
n_gene_clusters_metadata$cluster_id <-
x@gene_clusters_metadata$cluster_id[i]
n_gene_clusters_metadata$number <-
length(n_gene_clusters)
n_gene_clusters_metadata$size <-
x@gene_clusters_metadata$size[i]
if (length(x@gene_cluster_annotations) > 0) {
n_gene_cluster_annotations <- x@gene_cluster_annotations[i]
} else{
n_gene_cluster_annotations <- x@gene_cluster_annotations
}
n_cells_metadata <- x@cells_metadata
n_dbf_output <- x@dbf_output
n_dbf_output$center <-
n_dbf_output$center[i, , drop = FALSE]
}
} else {
if (missing(i)) {
n_data <- x@data[, j]
n_gene_clusters <- x@gene_clusters
n_top_genes <- x@top_genes
n_gene_clusters_metadata <- x@gene_clusters_metadata
n_gene_cluster_annotations <-
x@gene_cluster_annotations
n_cells_metadata <-
x@cells_metadata[j, , drop = FALSE]
n_dbf_output <- x@dbf_output
n_dbf_output$center <-
n_dbf_output$center[, j, drop = FALSE]
} else {
n_data <- x@data[unlist(x@gene_clusters[i]), j, drop = FALSE]
n_gene_clusters <- x@gene_clusters[i]
if (length(x@top_genes)) {
n_top_genes <- x@top_genes[i]
} else{
n_top_genes <- x@top_genes
}
n_gene_clusters_metadata <- x@gene_clusters_metadata
n_gene_clusters_metadata$cluster_id <-
x@gene_clusters_metadata$cluster_id[i]
n_gene_clusters_metadata$number <- length(i)
n_gene_clusters_metadata$size <-
x@gene_clusters_metadata$size[i]
if (length(x@gene_cluster_annotations) > 0) {
n_gene_cluster_annotations <- x@gene_cluster_annotations[i]
} else{
n_gene_cluster_annotations <- x@gene_cluster_annotations
}
n_cells_metadata <-
x@cells_metadata[j, , drop = FALSE]
n_dbf_output <- x@dbf_output
n_dbf_output$center <-
n_dbf_output$center[i, j, drop = FALSE]
}
}
cname <- clust_names(x)
csize <- clust_size(x)
new(
"ClusterSet",
data = n_data,
gene_clusters = n_gene_clusters,
top_genes = n_top_genes,
gene_clusters_metadata = n_gene_clusters_metadata,
gene_cluster_annotations = n_gene_cluster_annotations,
cells_metadata = n_cells_metadata,
dbf_output = n_dbf_output,
parameters = x@parameters
)
})
################################################################################
## Method nclust/clust_size for a
## ClusterSet object
################################################################################
#' @title Number of clusters in a ClusterSet object.
#' nclust
#' @description
#' The number of clusters in a ClusterSet object.
#' @param x The ClusterSet object
#' @export nclust
#' @examples
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' n_clust <- nclust(pbmc3k_medium_clusters)
#' @keywords internal
setGeneric("nclust",
function(x)
standardGeneric("nclust"))
#' @title Number of clusters in a ClusterSet object.
#' nclust
#' @description
#' The number of clusters in a ClusterSet object.
#' @param x The ClusterSet object
#' @export nclust
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' n_clust <- nclust(pbmc3k_medium_clusters)
setMethod("nclust", signature("ClusterSet"),
function(x) {
length(x@gene_clusters)
})
#' @title Sizes of the clusters stored in a ClusterSet object
#' clust_size
#' @description
#' The sizes of the clusters stored in a ClusterSet object.
#' @param x A ClusterSet object.
#' @export clust_size
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' clust_size(pbmc3k_medium_clusters)
#' @keywords internal
setGeneric("clust_size",
function(x)
standardGeneric("clust_size"))
#' @title Sizes of the clusters stored in a ClusterSet object
#' clust_size
#' @description
#' The sizes of the clusters stored in a ClusterSet object.
#' @param x A ClusterSet object.
#' @export
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' clust_size(pbmc3k_medium_clusters)
setMethod("clust_size", signature("ClusterSet"),
function(x) {
x@gene_clusters_metadata$size
})
#################################################################
## Define gene_cluster function for ClusterSet object
#################################################################
#' @title The gene clusters stored in a ClusterSet.
#' gene_cluster
#' @description
#' Returns a named vector (gene as names) and cluster
#' as value.
#' @param object a ClusterSet object.
#' @param cluster The cluster of interest. 0 means all cluster. Otherwise a non-null integer value.
#' @param as_string Return cluster names as strings.
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' g_clust <- gene_cluster(pbmc3k_medium_clusters)
#' @export gene_cluster
#' @keywords internal
setGeneric("gene_cluster",
function(object,
cluster = 0,
as_string = FALSE)
standardGeneric("gene_cluster"))
#' @title The gene clusters stored in a ClusterSet.
#' gene_cluster
#' @description
#' Returns a named vector (gene as names) and cluster
#' as value.
#' @param object a ClusterSet object.
#' @param cluster The cluster of interest. 0 means all cluster. Otherwise a non-null integer value.
#' @param as_string Return cluster names as strings.
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' g_clust <- gene_cluster(pbmc3k_medium_clusters)
#' @export gene_cluster
setMethod("gene_cluster", signature("ClusterSet"),
function(object,
cluster = 0,
as_string = FALSE) {
if (!is.null(object@gene_clusters)) {
nb_clust <- length(object@gene_clusters)
} else{
print_msg("There is no cluster in this object.",
msg_type = 'STOP')
}
if (!is.numeric(cluster))
print_msg("Please provide a numeric value.",
msg_type = 'STOP')
cluster <- unique(cluster)
if (!all(cluster - floor(cluster) == 0) |
any(cluster < 0 | any(cluster > nb_clust)))
print_msg(
"Please provide a zero (all clusters) or positive integer in the required range.",
msg_type = 'STOP'
)
if (length(cluster) == 1) {
if (cluster == 0) {
cluster <- 1:length(object@gene_clusters)
}
}
if (length(cluster) > 1) {
if (length(cluster[cluster == 0]))
print_msg("Zero is out of range.",
msg_type = 'STOP')
}
if (nb_clust) {
if (!as_string) {
cluster_as_int <- unlist(mapply(
rep,
cluster,
lapply(object@gene_clusters[cluster], length),
SIMPLIFY = TRUE
))
cluster_as_int <-
as.vector(as.matrix(cluster_as_int))
names(cluster_as_int) <-
unlist(object@gene_clusters[cluster])
return(cluster_as_int)
} else{
cluster_as_str <- unlist(mapply(
rep,
clust_names(object)[cluster],
lapply(object@gene_clusters[cluster], length),
SIMPLIFY = TRUE
))
cluster_as_str <-
as.vector(as.matrix(cluster_as_str))
names(cluster_as_str) <-
unlist(object@gene_clusters[cluster])
return(cluster_as_str)
}
} else{
return(NULL)
}
})
################################################################################
## Method for function matching genes in a ClusterSet.
################################################################################
#' @title Match operator of a ClusterSet object
#' @description The match operator of a ClusterSet object
#' @param x The gene to be searched;
#' @param table The ClusterSet object.
#' @keywords internal
#' @export `%in%`
setMethod("%in%", signature(x = "character", table = "ClusterSet"), function(x, table) {
x %in% names(gene_cluster(table))
})
#' @title Which clusters contain a set of genes.
#' @description Which clusters contain a set of genes.
#' @param object a ClusterSet object.
#' @param genes The genes to be searched in the ClusterSet.
#' @export which_clust
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' hit <- which_clust(pbmc3k_medium_clusters, genes = c("TJP2", "GLA", "UNKNOWN"))
#' @keywords internal
setGeneric("which_clust",
function(object,
genes = NULL)
standardGeneric("which_clust"))
#' @title Which clusters contain a set of genes.
#' @description Which clusters contain a set of genes.
#' @param object a ClusterSet object.
#' @param genes The genes to be searched in the ClusterSet.
#' @export which_clust
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' hit <- which_clust(pbmc3k_medium_clusters, genes = c("TJP2", "GLA", "UNKNOWN"))
setMethod("which_clust",
signature("ClusterSet"),
function(object, genes) {
check_format_cluster_set(object)
gc <- gene_cluster(object)
tmp <- gc[which(names(gc) %in% genes)]
tmp[genes]
})
################################################################################
## Method for searching genes using REGEXP
################################################################################
#' @title Search gene module using a regular expression.
#' @description Search gene module using a regular expression.
#' @param object a ClusterSet object.
#' @param reg_exp The regular expression indicating the genes to be found.
#' @export grep_clust
#' @param as_list Whether to return the result as a list.
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' hit <- grep_clust(pbmc3k_medium_clusters, reg_exp="^T.*[0-9]$")
#' @keywords internal
setGeneric("grep_clust",
function(object,
reg_exp = NULL,
as_list=FALSE)
standardGeneric("grep_clust")
)
#' @title Search gene module using a regular expression.
#' @description Search gene module using a regular expression.
#' @param object a ClusterSet object.
#' @param reg_exp The regular expression indicating the genes to be found.
#' @param as_list Whether to return the result as a list.
#' @export grep_clust
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' hit <- grep_clust(pbmc3k_medium_clusters, reg_exp="^T.*[0-9]$")
setMethod("grep_clust",
signature("ClusterSet"),
function(object=NULL,
reg_exp=NULL,
as_list=FALSE) {
check_format_cluster_set(object)
grep_term <- function(x, y, val=TRUE){ grep(y, x, val=val, perl = TRUE)}
hits <- lapply(object@gene_clusters, grep_term, reg_exp)
if(as_list){
return(hits)
}else{
hits <- stack(hits)
hits <- setNames(hits$values, hits$ind)
return(hits)
}
})
################################################################################
## Method for renaming clusters from a clusterSet
################################################################################
#' @title Rename the gene clusters of a ClusterSet
#' @description Rename the gene clusters of a ClusterSet.
#' @param object a ClusterSet object.
#' @param new_names The new names for the clusters.
#' @export rename_clust
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' new_obj <- rename_clust(pbmc3k_medium_clusters, new_names = letters[1:nclust(pbmc3k_medium_clusters)])
#' @keywords internal
setGeneric("rename_clust",
function(object, new_names = NULL)
standardGeneric("rename_clust"))
#' @title Rename the gene clusters of a ClusterSet
#' @description Rename the gene clusters of a ClusterSet.
#' @param object a ClusterSet object.
#' @param new_names The new names for the clusters.
#' @export rename_clust
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' new_obj <- rename_clust(pbmc3k_medium_clusters, new_names = letters[1:nclust(pbmc3k_medium_clusters)])
setMethod("rename_clust",
signature("ClusterSet"),
function(object,
new_names = NULL) {
check_format_cluster_set(object)
if (is.null(new_names)) {
if (length(object@gene_clusters)) {
new_names <- 1:nclust(object)
} else{
new_names <- NULL
}
}
if (length(new_names) != nclust(object))
print_msg("The number of labels should be the same a the number of clusters.")
names(object@gene_clusters) <- new_names
if (length(object@top_genes) > 0)
names(object@top_genes) <- new_names
object@gene_clusters_metadata$cluster_id <- new_names
names(object@gene_clusters_metadata$size) <- new_names
if (length(object@gene_cluster_annotations) > 0)
names(object@gene_cluster_annotations) <- new_names
rownames(object@dbf_output$center) <- new_names
return(object)
})
################################################################################
## Method for writing gene list into an excel sheet.
################################################################################
#' @title Write Cluster-Set gene lists into an excel sheet.
#' @description Write gene lists from a Cluster-Set object into an excel sheet.
#' @param object The ClusterSet object.
#' @param file_path The file path.
#' @keywords internal
#' @export cluster_set_to_xls
setGeneric("cluster_set_to_xls",
function(object,
file_path = NULL)
standardGeneric("cluster_set_to_xls"))
#' @title Write Cluster-Set gene lists into an excel sheet.
#' @description Write gene lists from a Cluster-Set object into an excel sheet.
#' @param object The ClusterSet object.
#' @param file_path The file path.
#' @importFrom WriteXLS WriteXLS
#' @examples
#' #' Load an example dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#'
#' # Write gene lists to file
#' tp_dir <- tempdir()
#' dir.create(tp_dir, showWarnings = FALSE)
#' cluster_set_to_xls(pbmc3k_medium_clusters, file.path(tp_dir, "test.xls"))
#' @export cluster_set_to_xls
setMethod("cluster_set_to_xls",
signature("ClusterSet"),
function(object,
file_path = NULL) {
check_format_cluster_set(object)
object <- reorder_genes(object)
dir_n <- dirname(file_path)
if (!dir.exists(dir_n))
print_msg("Directory does not exist. Exiting.", msg_type = "STOP")
if (file.exists(file_path))
print_msg("File already exist. Exiting.", msg_type = "STOP")
gnc <- gene_cluster(object)
df_list <- list(x=data.frame(All_modules = unname(gnc),
"official_gene_symbol" = names(gnc)))
tmp <- lapply(object@gene_clusters, as.data.frame)
for(i in 1:length(tmp)){
colnames(tmp[[i]]) <- paste0("Module ", i)
}
df_list <- append(df_list, tmp)
WriteXLS::WriteXLS(
x=df_list,
ExcelFileName = file_path,
SheetNames = c("All_modules", paste0("Module ", 1:length(object@gene_clusters)))
)
})
################################################################################
## Method for reordering clusters from a clusterSet
################################################################################
#' @title Reorder the clusters from a ClusterSet
#' @description Reorder the clusters from a ClusterSet based on their names
#' @param object a ClusterSet object.
#' @param new_order The names from the clusterSet in an alternative order.
#' @export reorder_clust
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' clust_size(pbmc3k_medium_clusters)
#' new_obj <- reorder_clust(pbmc3k_medium_clusters, new_order = 15:1)
#' clust_size(pbmc3k_medium_clusters)
#' @keywords internal
setGeneric("reorder_clust",
function(object, new_order = NULL)
standardGeneric("reorder_clust"))
#' @title Reorder the clusters from a ClusterSet
#' @description Reorder the clusters from a ClusterSet based on their names
#' @param object a ClusterSet object.
#' @param new_order The names from the clusterSet in an alternative order.
#' @export reorder_clust
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' clust_size(pbmc3k_medium_clusters)
#' new_obj <- reorder_clust(pbmc3k_medium_clusters, new_order = 15:1)
#' clust_size(new_obj)
setMethod("reorder_clust",
signature("ClusterSet"),
function(object,
new_order = NULL) {
check_format_cluster_set(object)
if (is.null(new_order)) {
print_msg('Please provide new_order argument.')
}
if (length(new_order) != nclust(object))
print_msg("The number of labels should be the same a the number of clusters.")
if (!all(sort(new_order) == sort(clust_names(object))))
print_msg("The labels should be the same in an alternative order.")
new_pos <- match(new_order, clust_names(object))
object@gene_clusters <- object@gene_clusters[new_pos]
if (length(object@top_genes) > 0)
object@top_genes <- object@top_genes[new_pos]
object@gene_clusters_metadata$cluster_id <-
object@gene_clusters_metadata$cluster_id[new_pos]
object@gene_clusters_metadata$size <-
object@gene_clusters_metadata$size[new_pos]
if (length(object@gene_cluster_annotations) > 0)
object@gene_cluster_annotations <-
object@gene_cluster_annotations[new_pos]
object@dbf_output$center <-
object@dbf_output$center[new_pos,]
return(object)
})
################################################################################
## Method for searching genes using REGEXP
################################################################################
#' @title Search genes within ClusterSet using a REGEXP.
#' @description Search genes within ClusterSet using a REGEXP.
#' @param object a ClusterSet object.
#' @param regexp The regular expression
#' @param val if FALSE, a vector containing the (integer) indices of the matches determined
#' by grep is returned, and if TRUE, a vector containing the matching elements themselves
#' is returned.
#' @export grep_clust
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' grep_clust(pbmc3k_medium_clusters, "[Kk][Rr][Tt]")
#' @keywords internal
setGeneric("grep_clust",
function(object,
regexp = NULL,
val = TRUE)
standardGeneric("grep_clust"))
#' @title Search genes within ClusterSet using a REGEXP.
#' @description Search genes within ClusterSet using a REGEXP.
#' @param object a ClusterSet object.
#' @param regexp The regular expression
#' @param val if FALSE, a vector containing the (integer) indices of the matches determined
#' by grep is returned, and if TRUE, a vector containing the matching elements themselves
#' is returned.
#' @export grep_clust
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' grep_clust(pbmc3k_medium_clusters, "^CD")
setMethod("grep_clust",
signature("ClusterSet"),
function(object,
regexp = NULL,
val = TRUE) {
check_format_cluster_set(object)
if (is.null(regexp)) {
print_msg('Please provide regexp argument.')
}
fgrep <-
function(x, regexp, val) {
grep(regexp, x, value = val)
}
lapply(object@gene_clusters, fgrep, regexp, val)
})
################################################################################
## Method for selecting a subset of column/cell for each identity
################################################################################
#' @title Given ncell, a target number, select ncell from each class of cell/column.
#' @description Given ncell, a target number, select ncell from each class of cell/column.
#' @param object a ClusterSet object.
#' @param ident A named vector. Names are cell/column names, values are classes/identity.
#' Typically the result of the Seurat::Ident() function.
#' @param nbcell The number of cell to select.
#' @param seed A seed for subsampling.
#' @export subsample_by_ident
#' @examples
#' # load a dataset
#' @keywords internal
setGeneric("subsample_by_ident",
function(object,
ident=NULL,
nbcell=TRUE,
seed=123)
standardGeneric("subsample_by_ident")
)
#' @title Given ncell, a target number, select ncell from each class of cell/column.
#' @description Given ncell, a target number, select ncell from each class of cell/column.
#' @param object a ClusterSet object.
#' @param ident A named vector. Names are cell/column names, values are classes/identity.
#' Typically the result of the Seurat::Ident() function.
#' @param nbcell The number of cell to select.
#' @param seed A seed for subsampling.
#' @export subsample_by_ident
#' @examples
#' # Set verbosity to 1 to display info messages only.
#' set_verbosity(1)
#'
#' # Load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' idents <- sample(1:10, size=ncol(pbmc3k_medium_clusters), rep=TRUE)
#' names(idents) <- col_names(pbmc3k_medium_clusters)
#' sub <- subsample_by_ident(pbmc3k_medium_clusters,
#' ident=idents,
#' nbcell=10)
setMethod("subsample_by_ident",
signature("ClusterSet"),
function(object,
ident=NULL,
nbcell=TRUE,
seed=123) {
check_format_cluster_set(object)
print_msg(paste0("Number of cell in the object: ", ncol(object)), msg_type = "DEBUG")
if(is.null(ident)){
print_msg('Please set the ident argument.')
}else{
name_idents <- names(ident)
if(is.null(name_idents)){
print_msg("The 'ident' argument needs a named vector.")
}
if(length(which(names(ident) == "")) != 0){
print_msg("The 'ident' argument needs a named vector or a named list of named vector.")
}
}
cell_ident <- split(names(ident), ident)
subsample <- function(x, y, seed){
if(length(x) < y){
print_msg("Not enough cells for sampling, returning max", msg_type = "DEBUG")
return(x)
}else{
set.seed(seed)
print_msg("Sampling requested number of cells", msg_type = "DEBUG")
return(sample(x, size = y, replace = FALSE))
}
}
cell_ident <- unlist(lapply(cell_ident, subsample, nbcell, seed))
print_msg(paste0("Number of cell left: ", length(cell_ident)), msg_type = "DEBUG")
object <- object[, cell_ident]
return(object)
})
################################################################################
## Method for printing gene clusters
################################################################################
#' @title Write the cluster to files.
#' @description Write the cluster to files.
#' @param object a ClusterSet object.
#' @param sep The separator
#' @param file_prefix A file prefix.
#' @param file_suffix A file suffix.
#' @param path A directory to store the files.
#' @param single_file Logical. Whether to write all clusters in a single file (one cluster / line). Need to change the default separator (e.g to ","). The file_prefix is used as file name.
#' @param write_cname Whether to add the cluster name. The cluster name is written as a prefix of each line in file(s) and followed by two pipes ("||").
#' @export write_clust
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' write_clust(pbmc3k_medium_clusters[1:3,], path="/tmp")
#' @keywords internal
setGeneric("write_clust",
function(object,
sep = "\n",
file_prefix="scigenex_clust",
file_suffix=".csv",
path=NULL,
single_file=FALSE,
write_cname=FALSE)
standardGeneric("write_clust")
)
#' @title Write the cluster to files.
#' @description Write the cluster to files.
#' @param object a ClusterSet object.
#' @param sep The separator
#' @param file_prefix A file prefix.
#' @param file_suffix A file suffix.
#' @param path A directory to store the files.
#' @param single_file Logical. Whether to write all clusters in a single file (one cluster / line). Need to change the default separator (e.g to ","). The file_prefix is used as file name.
#' @param write_cname Whether to add the cluster name. The cluster name is written as a prefix of each line in file(s) and followed by two pipes ("||").
#' @export write_clust
#' @examples
#' # load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#' write_clust(pbmc3k_medium_clusters[1:3,], path="/tmp")
setMethod("write_clust",
signature("ClusterSet"),
function(object,
sep = ",",
file_prefix="scigenex_clust",
file_suffix=".csv",
path=NULL,
single_file=TRUE,
write_cname=TRUE) {
if(is.null(path)){
path <- getwd()
}else{
if(!dir.exists(path)){
print_msg(paste0("Creating a path for output: ",
path),
msg_type = "INFO")
dir.create(path, showWarnings = FALSE, recursive = TRUE)
}
}
check_format_cluster_set(object)
if(!single_file){
cat_fun <- function(x, sep=NULL, file=NULL, write_cname=FALSE, clust_names){
if(!write_cname){
cat(paste0(sort(x), collapse = sep), file=file)
}else{
cat(paste0(clust_names, "||", paste0(sort(x), collapse = sep), sep=""), file=file)
}
}
for(i in 1:length(object@gene_clusters)){
file_out <- paste0(file_prefix, "_", i, file_suffix)
cat_fun(object@gene_clusters[[i]],
sep=sep,
file=file.path(path, file_out),
write_cname=write_cname,
clust_names=names(object@gene_clusters)[i])
}
}else{
for(i in 1:length(object@gene_clusters)){
if(!write_cname){
cat(paste0(object@gene_clusters[[i]], collapse = sep),
file=file.path(path, paste0(file_prefix, file_suffix)),
append = TRUE,
sep="\n")
}else{
clust_names <- names(object@gene_clusters)[i]
cat(paste0(clust_names, "||", paste0(object@gene_clusters[[i]], collapse = sep)),
file=file.path(path, paste0(file_prefix, file_suffix)),
append = TRUE,
sep="\n")
}
}
}
})
#################################################################
## Define top_by_intersect function for a ClusterSet object
#################################################################
#' @title Select top_genes based on intersection with a list.
#' @description
#' The clusterSet object contains a top_genes slot that can be used to display
#' genes in heatmaps (see \code{plot_heatmap} function). Here the function select
#' top_genes based on intersection with a list.
#' @param object A \code{ClusterSet} object.
#' @param set A list to compare clusters to.
#' @param as_list Return a list of clusters not a ClusterSet object.
#' @return A \code{ClusterSet} object or a list (see as_list).
#' @export top_by_intersect
#' @keywords internal
#' @examples
#' # Set verbosity to 1 to display info messages only.
#' set_verbosity(1)
#'
#' # Load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#'
#' set <- c('MS4A1', 'ISG20', 'CD3D', 'SEC14L5', 'RPL11', 'RPL32')
#' pbmc3k_medium_clusters <- top_by_intersect(pbmc3k_medium_clusters, set=set)
#' pbmc3k_medium_clusters@top_genes
setGeneric("top_by_intersect",
function(object,
set=NULL,
as_list=FALSE)
standardGeneric("top_by_intersect")
)
#################################################################
## Define top_by_intersect function for a ClusterSet object
#################################################################
#' @title Select top_genes based on intersection with a list.
#' @description
#' The clusterSet object contains a top_genes slot that can be used to display
#' genes in heatmaps (see \code{plot_heatmap} function). Here the function select
#' top_genes based on intersection with a list.
#' @param object A \code{ClusterSet} object.
#' @param set A list to compare clusters to.
#' @param as_list Return a list of clusters not a ClusterSet object.
#' @return A \code{ClusterSet} object or a list (see as_list).
#' @export top_by_intersect
#' @examples
#' # Set verbosity to 1 to display info messages only.
#' set_verbosity(1)
#'
#' # Load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#'
#' set <- c('MS4A1', 'ISG20', 'CD3D', 'SEC14L5', 'RPL11', 'RPL32')
#' pbmc3k_medium_clusters <- top_by_intersect(pbmc3k_medium_clusters, set=set)
#' pbmc3k_medium_clusters@top_genes
setMethod("top_by_intersect",
signature("ClusterSet"),
function(object,
set=NULL,
as_list=FALSE) {
if(is.null(set))
print_msg("Please provide a set ('set' argument)",
msg_type = "STOP")
top_gn <- lapply(object@gene_clusters, intersect, set)
if(as_list){
return(top_gn)
}else{
object@top_genes <- top_gn
return(object)
}
})
#################################################################
## Define top_by_grep function for a ClusterSet object
#################################################################
#' @title Select top_genes based on a regular expression search
#' @description
#' The clusterSet object contains a top_genes slot that can be used to display
#' genes in heatmaps (see \code{plot_heatmap} function). Here the function select
#' top_genes based on a regular expression search
#' @param object A \code{ClusterSet} object.
#' @param regexp A regular expression
#' @param as_list Return a list of clusters not a ClusterSet object.
#' @return A \code{ClusterSet} object or a list (see as_list).
#' @export top_by_grep
#' @keywords internal
#' @examples
#' # Set verbosity to 1 to display info messages only.
#' set_verbosity(1)
#'
#' # Load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#'
#' pbmc3k_medium_clusters <- top_by_grep(pbmc3k_medium_clusters, regexp="^CD")
#' pbmc3k_medium_clusters@top_genes
setGeneric("top_by_grep",
function(object,
regexp=NULL,
as_list=FALSE)
standardGeneric("top_by_grep")
)
#################################################################
## Define top_by_grep function for a ClusterSet object
#################################################################
#' @title Select top_genes based on a regular expression search
#' @description
#' The clusterSet object contains a top_genes slot that can be used to display
#' genes in heatmaps (see \code{plot_heatmap} function). Here the function select
#' top_genes based on a regular expression search
#' @param object A \code{ClusterSet} object.
#' @param regexp A regular expression
#' @param as_list Return a list of clusters not a ClusterSet object.
#' @return A \code{ClusterSet} object or a list (see as_list).
#' @export top_by_grep
#' @examples
#' # Set verbosity to 1 to display info messages only.
#' set_verbosity(1)
#'
#' # Load a dataset
#' load_example_dataset('7871581/files/pbmc3k_medium_clusters')
#'
#' pbmc3k_medium_clusters <- top_by_grep(pbmc3k_medium_clusters, regexp="^CD")
#' pbmc3k_medium_clusters@top_genes
setMethod("top_by_grep",
signature("ClusterSet"),
function(object,
regexp=NULL,
as_list=FALSE) {
if(is.null(regexp))
print_msg("Please provide a regexp ('regexp' argument)",
msg_type = "STOP")
fun_grep <- function(x, regexp, val=TRUE, perl=TRUE) grep(regexp, x, val=val, perl=perl)
top_gn <- lapply(object@gene_clusters, fun_grep, regexp, val=TRUE, perl=TRUE)
if(as_list){
return(top_gn)
}else{
object@top_genes <- top_gn
return(object)
}
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.