# Methods wrapped to be used in a uniformized way
#' Subtypes patients using Similarity Network Fusion
#'
#' @param data_list a list of data matrices with continuous data of format
#' samples x features (with the same number of samples).
#'
#' @param cluster_number The supposed or previously infered number of clusters.
#'
#' @param K Number of neighbors in K-nearest neighbors part of the algorithm of
#' fusion and for the computation of the affinity matrix (same parameter for
#' both process).
#'
#' @param alpha Variance for the local model (for the Gaussian kernel of the
#' affinity matrix). Recommended values are between 0.3 and 0.8.
#'
#' @param t Number of iterations for the diffusion process.
#'
#' @param spectral_clust_type The type of spectral clustering, see
#' \code{\link[SNFtool]{spectralClustering}} for more information.
#'
#' @param minimal_return logical, if TRUE, the result of the function will just
#' be what's needed to evaluate the goodness of the partition, i.e. the
#' partition and the element for internal metrics.
#'
#' @param just_fuse wetherto just integrate matrices and return the fused matrix
#' or not
#'
#' @return a result list containing:
#' * $partition: The predicted partition
#' * $element_for_metric: The name of the element in the result list.
#' containing the data to be used with internal metrics.
#' * $affinity_fused: The fused affinity matrix returned by the function
#' \code{\link[SNFtool]{SNF}}.
#'
#' @seealso \code{\link[SNFtool]{SNF}},
#' \code{\link[SNFtool]{spectralClustering}},
#' \code{\link[SNFtool]{affinityMatrix}}.
#'
#' @export
subtype_snf <- function(data_list,
minimal_return = FALSE,
cluster_number,
K = 20,
alpha = 0.5,
t = 20,
spectral_clust_type = 3,
just_fuse = FALSE) {
## Main:
# SNF process
affinity_fused <- SNFtool::SNF(
Wall = lapply(
lapply(data_list, function(data) SNFtool::dist2(
as.matrix(data),
as.matrix(data)
)),
function(dist) SNFtool::affinityMatrix(
diff = dist,
K = K,
sigma = alpha
)
),
K = K,
t = t
)
if (just_fuse) {
return(affinity_fused)
}
# Spectral clustering
partition <- SNFtool::spectralClustering(
affinity = affinity_fused,
K = cluster_number,
type = spectral_clust_type
)
return(list(
partition = partition,
element_for_metric = "affinity_fused",
affinity_fused = affinity_fused
))
}
#' Subtypes using Affinity Network Fusion
#'
#' @param data_list a list of data matrices with continuous data of format
#' samples x features (with the same number of samples).
#'
#' @param minimal_return logical, if TRUE, the result of the function will just
#' be what's needed to evaluate the goodness of the partition, i.e. the
#' partition and the element for internal metrics.
#'
#' @param cluster_number The supposed or previously infered number of clusters.
#'
#' @param k_affi The number of k-nearest neighbors for the affinity matrix
#' calculation.
#'
#' @param alpha_affi Coefficient for local diameters for affinity matrix
#' calculation. Default value: 1/6.
#'
#'
#' @param beta_affi Coefficient for pair-wise distance for affinity matrix
#' calculation. Default value: 1/6.
#'
#'
#' @param k_fusion the number of k nearest neighbors for function kNN_graph
#'
#' @param weigth_fusion a list of non-negative real numbers (which will be
#' normalized internally so that it sums to 1) that one-to-one correspond to
#' the affinity matrices computed from the features matrices provided in
#' `data_list`. If not set, internally uniform weights are assigned to all
#' the affinity matrices.
#'
#' @param type_fusion choose one of the two options "one-step" random
#' walk, or "two-step" random walk (for the fusion process).
#'
#' @param alpha_fusion a list of eight non-negative real numbers (which will be
#' normalized internally to make it sums to 1). Only used when "two-step"
#' (default value of `type_fusion`) random walk is used. `alpha_fusion` is the
#' weights for eight terms in the "two-step" random walk formula (check
#' research paper for more explanations about the terms).
#' Default value: (1, 1, 0, 0, 0, 0, 0, 0), i.e., only use the first two terms
#' (since they are most effective in practice).
#'
#' @param spectral_type choose one of three versions of graph Laplacian:
#' "unnormalized": unnormalized graph Laplacian matrix (L = D - W`);
#' "rw": normalization closely related to random walk (L = I - D^(-1)*W)`;
#' (default choice) "sym": normalized symmetric matrix
#' (L = I - D^(-0.5) * W * D^(-0.5)`)
#' For more information:
#' \url{https://www.cs.cmu.edu/~aarti/Class/10701/readings/Luxburg06_TR.pdf}
#'
#' @param verbose_fusion logical(1); if true, print some information concerning
#' the fusion step.
#'
#'
#' @param just_fuse wetherto just integrate matrices and return the fused matrix
#' or not
#'
#' @return a result list containing:
#' * $partition: The predicted partition
#' * $element_for_metric: The name of the element in the result list.
#' containing the data to be used with internal metrics.
#' * $affinity_fused: The fused affinity matrix returned by the function
#' \code{\link[ANF]{ANF}}
#'
#' @seealso \code{\link[ANF]{ANF}}, \code{\link[ANF]{affinity_matrix}},
#' \code{\link[ANF]{spectral_clustering}}.
#'
#' @export
#'
subtype_anf <- function(data_list,
minimal_return = FALSE,
cluster_number,
k_affi,
alpha_affi = 1 / 6,
beta_affi = 1 / 6,
k_fusion = 20,
weigth_fusion = NULL,
type_fusion = c("two-step", "one-step"),
alpha_fusion = c(1, 1, 0, 0, 0, 0, 0, 0),
spectral_type = c("rw", "sym", "unnormalized"),
verbose_fusion = FALSE,
just_fuse = FALSE) {
type_fusion <- match.arg(type_fusion)
spectral_type <- match.arg(spectral_type)
affinity_list <- lapply(data_list, function(data) ANF::affinity_matrix(
D = as.matrix(dist(data)),
k = k_affi,
alpha = alpha_affi,
beta = beta_affi
))
affinity_fused <- ANF::ANF(
Wall = affinity_list, K = k_fusion, weight = weigth_fusion,
type = type_fusion, alpha = alpha_fusion, verbose = verbose_fusion
)
if (just_fuse) {
return(affinity_fused)
}
partition <- ANF::spectral_clustering(
A = affinity_fused,
k = cluster_number,
type = spectral_type
)
return(list(
partition = partition,
element_for_metric = "affinity_fused",
affinity_fused = affinity_fused
))
}
# To set up the parameters of PerturbationClustering:
# * `ncore` Number of cores that the algorithm should use.
# Default value is 2.
#
# * `clusteringMethod` The name of built-in clustering algorithm that
# PerturbationClustering will use. Currently supported algorithm are kmeans,
# pam and hclust. Default value is "kmeans".
#
# * `clusteringFunction` The clustering algorithm function that will be used
# instead of built-in algorithms.
# * `clusteringOptions` A list of parameter will be passed to the clustering
# algorithm in clusteringMethod.
# * `perturbMethod` The name of built-in perturbation method that
# PerturbationClustering will use, currently supported methods are noise and
# subsampling. Default value is "noise".
# * `perturbFunction` The perturbation method function that will be used
# instead of built-in ones.
# * `perturbOptions` A list of parameter will be passed to the perturbation
# method in perturbMethod.
# * `iterMin` The minimum number of iterations. Default value is 20.
# * `iterMax` The maximum number of iterations. Default value is 200.
# * `madMin` The minimum of Mean Absolute Deviation of AUC of Connectivity
# matrix for each k. Default value is 1e-03.
#' Subtypes using PINSPlus package
#'
#' Perform subtyping using multiple types of data
#'
#' subtype_pins uses \code{\link[PINSPlus]{SubtypingOmicsData}}. The input is a
#' list of data matrices where each matrix represents the molecular
#' measurements of a data type. The input matrices must have the same number
#' of rows. The function aims to find the optimum number of subtypes
#' and location of each sample in the clusters from integrated input data
#' dataList through two processing stages:
#'
#' * Stage I: The algorithm first partitions each data type using the function
#' PerturbationClustering. It then merges the connectivities across data
#' types into similarity matrices. Both kmeans and similarity-based
#' clustering algorithms - partitioning around medoids pam are used to
#' partition the built similarity. The algorithm returns the partitioning
#' that agrees the most with individual data types.
#'
#' * Stage II: The algorithm attempts to split each discovered group if there
#' is a strong agreement between data types, or if the subtyping in Stage I
#' is very unbalanced.
#'
#'
#'
#' @param data_list a list of data matrices with continuous data of format
#' samples x features (with the same number of samples).
#'
#' @param minimal_return logical, if TRUE, the result of the function will just
#' be what's needed to evaluate the goodness of the partition, i.e. the
#' partition and the element for internal metrics.
#'
#' @param return_stage_2 logical to return the partition of the stage 2 of the
#' PINS method's workflow.
#' @param k_max The maximum number of clusters tested (from 2 to k_max).
#' Default value is 5.
#' @param agreement_cutoff Agreement threshold to be considered consistent.
#' Default value is 0.5.
#' @param verbose Set it to TRUE of FALSE to get more or less details
#' respectively.
#' @param ... these arguments will be passed to PerturbationClustering
#' algorithm. See \code{\link[PINSPlus]{PerturbationClustering}}.
#'
#' @return a result list containing:
#' * $partition: The predicted partition
#' * $element_for_metric: The name of the element in the result list.
#' containing the data to be used with internal metrics.
#' * $dataTypeResult: A list of results for individual data type.
#' Each element of the list is the result of
#' \code{\link[PINSPlus]{PerturbationClustering}}
#' for the corresponding data matrix provided in dataList.
#'
#' @export
#'
#' @seealso \code{\link[PINSPlus]{PerturbationClustering}},
#' \code{\link[PINSPlus]{SubtypingOmicsData}}.
#'
subtype_pins <- function(data_list,
minimal_return = FALSE,
return_stage_2 = TRUE,
k_max = 5,
agreement_cutoff = 0.5,
verbose = T, ...) {
result <- PINSPlus::SubtypingOmicsData(
dataList = data_list,
kMax = k_max,
agreementCutoff = agreement_cutoff,
verbose = verbose,
... = ...
)
if (minimal_return) {
list(
partition = if (return_stage_2) result$cluster2 else result$cluster1,
element_for_metric = NULL
)
} else {
list(
partition = if (return_stage_2) result$cluster2 else result$cluster1,
element_for_metric = NULL,
data_type_result = result$dataTypeResult
)
}
}
spectral_clustering_for_cc <- function(this_dist, k) {
SNFtool::spectralClustering(
affinity = as.matrix(this_dist),
K = k
)
}
#' Apply consensus clustering on affinity matrix.
#'
#' Use spectral clustering as the clustering function of consensus clustering on
#' the provided affinity matrix.
#'
#' It uses \code{\link[ConsensusClusterPlus]{ConsensusClusterPlus}}.
#' ConsensusClusterPlus implements the Consensus Clustering algorithm of Monti,
#' et al (2003). The function will subsamples the affinity matrix according
#' to pItem, pFeature, weightsItem, and weightsFeature, and clusters the data
#' into 2 to maxK clusters using spectral clustering.
#'
#' It will also compute the item consensus results using
#' \code{\link[ConsensusClusterPlus]{calcICL}}. For more informations, see the
#' documentation of the original package!
#'
#'
#'
#' @param affinity_matrix affinity matrix (e.g. produced by snf or anf)
#' @param cluster_number_max integer. The maximum cluster number to evaluate.
#' @param reps Number of subsamples evaluated.
#' @param pItem numeric, proportion of items to sample.
#' @param pFeature numeric, proportion of features to sample.
#' @param title character for output directory. Directory is created only
#' if plot is not NULL or writeTable is TRUE. This title can be an abosulte
#' or relative path.
#' @param innerLinkage heirarchical linkage method for subsampling.
#' @param finalLinkage heirarchical linkage method for consensus matrix.
#' @param ml optional. prior result, if supplied then only do graphics and
#' tables.
#' @param tmyPal optional character vector of colors for consensus matrix
#' @param seed optional numerical value. sets random seed for reproducible
#' results.
#' @param plot character value. NULL - print to screen, 'pdf', 'png', 'pngBMP'
#' for bitmap png, helpful for large datasets.
#' @param writeTable logical value. TRUE - write ouput and log to csv.
#' @param weightsItem optional numerical vector. weights to be used for sampling
#' items.
#' @param weightsFeature optional numerical vector. weights to be used for
#' sampling features.
#' @param verbose boolean. If TRUE, print messages to the screen to indicate
#' progress. This is useful for large datasets.
#' @param corUse optional character value. specifies how to handle missing data
#' in correlation distances 'everything','pairwise.complete.obs',
#' 'complete.obs' see cor() for description.
#'
#' @return results of ConsensusClusterPlus & calcICL.
#' @export
#'
consensus_spectral_clustering <- function(
affinity_matrix,
cluster_number_max = 5,
reps = 10,
pItem = 0.8, pFeature = 1,
title = "Consensus spectral clustering",
innerLinkage = "average",
finalLinkage = "average",
distance = "pearson",
ml = NULL,
tmyPal = NULL,
seed = NULL,
plot = NULL,
writeTable = FALSE,
weightsItem = NULL, weightsFeature = NULL,
verbose = F, corUse = "everything") {
## Preparation
d <- as.dist(affinity_matrix)
## Main
cc <- ConsensusClusterPlus::ConsensusClusterPlus(
d = d,
maxK = cluster_number_max,
reps = reps, pItem = pItem, pFeature = pFeature,
clusterAlg = "spectral_clustering_for_cc", title = title,
innerLinkage = innerLinkage, finalLinkage = finalLinkage,
ml = ml, tmyPal = tmyPal, seed = seed, plot = plot,
writeTable = writeTable, weightsItem = weightsItem,
weightsFeature = weightsFeature, verbose = verbose, corUse = corUse
)
icl <- ConsensusClusterPlus::calcICL(
res = cc, title = title,
plot = plot, writeTable = writeTable
)
## Return
list(
cc_result = cc,
icl = icl
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.