R/omada.R

Defines functions omada

Documented in omada

#' A wrapper function that utilizes all tools to produce the optimal
#' sample memberships
#'
#' @param data A dataframe, where columns are features and rows are data points
#' @param method.upper.k The upper limit of clusters, k, to be considered.
#' Must be more than 2
#'
#' @return An object of class "clusterAnalysis" containing
#' partition.agreement.scores,partition.agreement.plot,feature.selection.scores,
#' feature.selection.plot, feature.selection.optimal.features,
#' feature.selection.optimal.number.of.features, cluster.voting.scores,
#' cluster.voting.cluster.memberships,cluster.voting.metric.votes,
#' cluster.voting.k.votes,cluster.voting.plot,sample.memberships,
#' signature.feature.coefs and signature.feature.plot
#'
#' @export
#'
#' @examples
#' omada(toy_genes, method.upper.k = 3)

omada <- function(data, method.upper.k = 5) {

    rnames <- row.names(data)

    if(method.upper.k <= 2) {
        method.upper.k <- 3
    }

    # Running method selection with a default comparison number of 3
    methods.results <-
        clusteringMethodSelection(data, method.upper.k = method.upper.k,
                                  number.of.comparisons = 3)

    pa.df <- get_partition_agreement_scores(methods.results)
    pa.plot <- plot_partition_agreement(methods.results)

    optimal.method <- names(which.max(colMeans(pa.df[seq_len(3)]))) # Selected method

    # Running feature selection so that we consider 5 steps in total
    step <- dim(data)[2]/5
    feature.results <- featureSelection(data, min.k = 2,
                                        max.k = method.upper.k, step = step)

    fs.df <- get_average_feature_k_stabilities(feature.results)
    fs.plot <- plot_average_stabilities(feature.results) # stability line plot

    # Selected  features
    optimal.features <- get_optimal_features(feature.results)
    optimal.number.of.features <-
        get_optimal_number_of_features(feature.results)

    # Selected dataset
    data <- data[,optimal.features]
    row.names(data) <- rnames

    # Running the voting for k
    if (optimal.method == "spectral") {
        optimal.method.abbr <- "sc"
    } else if (optimal.method == "hierarchical") {
        optimal.method.abbr <- "hc"
    } else {
        optimal.method.abbr <- "km"
    }

    cluster.voting.results <- clusterVoting(data, 2, method.upper.k,
                                            optimal.method.abbr)

    cv.scores <- get_internal_metric_scores(cluster.voting.results)
    cv.clusters <- get_cluster_memberships_k(cluster.voting.results)
    cv.votes <- get_metric_votes_k(cluster.voting.results)
    cv.ensemble <- get_vote_frequencies_k(cluster.voting.results)
    cv.plot <- plot_vote_frequencies(cluster.voting.results)

    optimal.k <-
        as.numeric(substring(
            cv.ensemble[which.max(cv.ensemble$Frequency),][[1]], 2))

    optimal.clustering <- optimalClustering(data, optimal.k, optimal.method)

    # Generate memberships
    memberships <- get_optimal_memberships(optimal.clustering)
    optimal.stability.score <- get_optimal_stability_score(optimal.clustering)
    optimal.parameter.used <- get_optimal_parameter_used(optimal.clustering)

    # Generate gene signatures
    # data$id <- rownames(data)
    gene.signature.results <- geneSignatures(data, memberships)
    gs.matrix <- get_coefficient_dataset(gene.signature.results)
    gs.plot <- plot_top30percent_coefficients(gene.signature.results)

    clusterAnalysis <- function(partition.agreement.scores=pa.df,
                                partition.agreement.plot=pa.plot,
                                feature.selection.scores=fs.df,
                                feature.selection.plot=fs.plot,
                                feature.selection.optimal.features=
                                    optimal.features,
                                feature.selection.optimal.number.of.features=
                                    optimal.number.of.features,
                                cluster.voting.scores=cv.scores,
                                cluster.voting.memberships=cv.clusters,
                                cluster.voting.metric.votes=cv.votes,
                                cluster.voting.k.votes=cv.ensemble,
                                cluster.voting.plot=cv.plot,
                                sample.memberships=memberships,
                                signature.feature.coefs=gs.matrix,
                                signature.feature.plot=gs.plot){

        ca <- list(partition.agreement.scores = partition.agreement.scores,
                   partition.agreement.plot = partition.agreement.plot,
                   feature.selection.scores = feature.selection.scores,
                   feature.selection.plot = feature.selection.plot,
                   feature.selection.optimal.features=
                       feature.selection.optimal.features,
                   feature.selection.optimal.number.of.features=
                       feature.selection.optimal.number.of.features,
                   cluster.voting.scores = cluster.voting.scores,
                   cluster.voting.memberships = cluster.voting.memberships,
                   cluster.voting.metric.votes = cluster.voting.metric.votes,
                   cluster.voting.k.votes = cluster.voting.k.votes,
                   cluster.voting.plot = cluster.voting.plot,
                   sample.memberships = sample.memberships,
                   signature.feature.coefs = signature.feature.coefs,
                   signature.feature.plot = signature.feature.plot
                   )

        ## Set the name for the class
        class(ca) <- "clusterAnalysis"

        return(ca)
    }

    cluster.analysis <- clusterAnalysis()

    return(cluster.analysis)
}
BioSok/omada documentation built on Aug. 21, 2023, 2:38 p.m.