R/split_data.R

Defines functions splitTestClusters splitTrainClusters splitClusters

Documented in splitClusters splitTestClusters splitTrainClusters

# Function used to split data, keeping only clusters of interest

#' Split clusters according to symmetric AUROC similarity.
#'
#' This function computes hierarchical clustering to group similar clusters,
#' interpreting the AUROC matrix as a similarity matrix, then uses a standard
#' tree cutting algorithm to obtain groups of similar clusters. Note that the
#' cluster hierarchy corresponds exactly to the dendrogram shown when using
#' the plotHeatmap function.
#'
#' @param mn_scores A symmetric AUROC matrix as generated by MetaNeighborUS.
#' @param k The number of desired cluster sets.
#'
#' @return A list of cluster sets, each cluster set is a character vector
#'  containg cluster labels.
#' 
#' @seealso \code{\link{plotHeatmap}}
#'
#' @export
splitClusters <- function(mn_scores, k) {
    is_na <- apply(mn_scores, 2, function(x) { all(is.na(x)) })
    mn_scores <- mn_scores[!is_na, !is_na]
    mn_hclust <- stats::hclust(stats::as.dist(1-mn_scores), method = "average")
    result <- stats::cutree(mn_hclust, k=k)
    result <- split(names(result), result)
    return(result)
}


#' Split train clusters according to AUROC similarity to test clusters.
#'
#' This function computes hierarchical clustering to group similar train
#' clusters, using similarity to test clusters as features, then uses a standard
#' tree cutting algorithm to obtain groups of similar clusters. Note that the
#' cluster hierarchy corresponds exactly to the column dendrogram shown when
#' using the plotHeatmapPretrained function.
#'
#' @param mn_scores An AUROC matrix as generated by MetaNeighborUS, usually with
#' the "trained_model" option.
#' @param k The number of desired cluster sets.
#'
#' @return A list of cluster sets, each cluster set is a character vector
#' containg cluster labels.
#' 
#' @seealso \code{\link{plotHeatmapPretrained}}
#'
#' @export
splitTrainClusters <- function(mn_scores, k) {
    row_is_na <- apply(mn_scores, 1, function(x) { all(is.na(x)) })
    col_is_na <- apply(mn_scores, 2, function(x) { all(is.na(x)) })
    mn_scores <- mn_scores[!row_is_na, !col_is_na]
    mn_hclust <- stats::hclust(stats::dist(t(mn_scores)), method = "average")
    result <- stats::cutree(mn_hclust, k=k)
    result <- split(names(result), result)
    return(result)
}

#' Split test clusters according to AUROC similarity to train clusters.
#'
#' This function computes hierarchical clustering to group similar test
#' clusters, using similarity to train clusters as features, then uses a
#' standard tree cutting algorithm to obtain groups of similar clusters.
#' Note that the cluster hierarchy does *not* correspond to the row ordering of
#' plotHeatmapPretrained function, which uses a different heuristic.
#'
#' @param mn_scores An AUROC matrix as generated by MetaNeighborUS, usually with
#' the "trained_model" option.
#' @param k The number of desired cluster sets.
#'
#' @return A list of cluster sets, each cluster set is a character vector
#' containg cluster labels.
#' 
#' @seealso \code{\link{plotHeatmapPretrained}}
#'
#' @export
splitTestClusters <- function(mn_scores, k) {
    splitTrainClusters(t(mn_scores), k)
}

Try the MetaNeighbor package in your browser

Any scripts or data that you put into this service are public.

MetaNeighbor documentation built on Nov. 8, 2020, 5:40 p.m.