SingleBench: Framework for benchmarking and optimising dimension-reduction and clustering pipelines in expression data analysis

Documented in GetAnnotation GetAnnotationScoring GetBootstrapIndices GetClustering GetClusteringInput GetClusteringScores GetClusteringScoringTable GetCoRanking GetDistanceMatrix GetExpressionMatrix GetkNNMatrix GetLayout GetNParameterIterationName GetNParameterIterationName_Clustering GetNParameterIterationName_Projection GetNParameterIterationsCount GetNParameterNames GetNParameterValues GetPenaltyScoringMatrix GetProjection GetProjectionScores GetProjectionScoringTable GetRMSDPerCluster GetScores GetSubpipelineName

#' Get \code{Benchmark} collapsed co-ranking matrix
#'
#' Extracts the collapsed co-ranking matrix for assessing quality of a projection.
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer value: index of subpipeline that includes a projection step
#' @param idx.n_param optional integer value: index of subpipeline n-parameter iteration. Default value is \code{NULL}
#'
#' @seealso
#'
#' @export
GetCoRanking <- function(
  benchmark,
  idx.subpipeline,
  idx.n_param = NULL
) {
  list(
    Matrix = .h5read(benchmark$h5_path, .h5_slotname(idx.subpipeline, 'Projection', idx.n_param, suffix = 'Scoring/CoRankingMatrix')),
    Collapsed = .h5read(benchmark$h5_path, .h5_slotname(idx.subpipeline, 'Projection', idx.n_param, suffix = 'Scoring/Collapsed')),
    K = .h5read(benchmark$h5_path, .h5_slotname(idx.subpipeline, 'Projection', idx.n_param, suffix = 'Scoring/ProjectionNeighbourhood'))
  )
}

#' Get \code{Benchmark} penalty scoring matrix
#'
#' Extracts the penalty scoring matrix, which defines a hierarchy penalty model, from a \code{Benchmark}-type object.
#' 
#' @param benchmark object of type \code{Benchmark} that uses hierarchical penalties
#'
#' @seealso
#' 
#' * **\code{CreatePenaltyScoringMatrix}**: creates a penalty scoring matrix for a given set of manually annotated populations
#'
#' @export
GetPenaltyScoringMatrix <- function(
  benchmark
) {
  .h5readNamedMatrix(benchmark, '/Input/Annotation/ScoringMatrix')
}

#' Get \code{Benchmark} manual annotation scoring
#'
#' Extracts values of unsupervised evaluation metrics applied to manual annotation of benchmark input data.
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param suffix optional string (advanced): HDF5 slot name suffix
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' @export
GetAnnotationScoring <- function(
  benchmark, suffix = ''
) {
  .h5readAnnotationScoring(benchmark, suffix)
}

#' Get \code{Benchmark} expression data
#'
#' Extracts expression matrix (or list of expression matrices) associated with inputs to a benchmark pipeline.
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param concatenate logical value; if a list of layout matrices (per sample) is available, should it be concatenated into one vector? Default value is \code{FALSE}
#'
#' @seealso 
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' @export
GetExpressionMatrix <- function(
  benchmark, concatenate = FALSE
) {
  obj <- .h5read(benchmark$h5_path, 'Input/ExpressionMatrix')
  if (is.list(obj)) {
    cn <- .h5read(benchmark$h5_path, 'Input/ColumnNames')
    for (idx in seq_along(obj))
      colnames(obj[[idx]]) <- cn
  } else {
    colnames(obj) <- .h5read(benchmark$h5_path, 'Input/ColumnNames')
  }
  if (concatenate && is.list(obj))
    obj <- do.call(rbind, obj)
  obj
}

#' Get \code{Benchmark} expression data \code{k}-NN matrix
#'
#' Extracts *k*-nearest-neighbours matrix associated with inputs to a benchmark pipeline.
#' 
#' @param benchmark object of type \code{Benchmark}
#'
#' @seealso 
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' @export
GetkNNMatrix <- function(
  benchmark
) {
  .h5readkNNMatrix(benchmark)
}

#' Get \code{Benchmark} expression data distance matrix
#'
#' Extracts distance matrix associated with inputs to a benchmark pipeline.
#' 
#' @param benchmark object of type \code{Benchmark}
#'
#' @seealso 
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' @export
GetDistanceMatrix <- function(
  benchmark
) {
  .h5readDistanceMatrix(benchmark)
}

#' Get \code{Benchmark} manual annotation
#'
#' Extracts manual annotation vector (or list of vectors) associated with inputs to a benchmark pipeline.
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param concatenate logical value; if a list of annotation vectors (per sample) is available, should it be concatenated into one vector? Default value is \code{FALSE}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' @export
GetAnnotation <- function(
  benchmark, concatenate = FALSE
) {
  .h5readFactorVectorOrListOfThem(benchmark$h5_path, 'Input/Annotation', concatenate = concatenate)
}

#' Get \code{Benchmark} sub-pipeline name
#'
#' Extracts name of a sub-pipeline in a \code{Benchmark} object (benchmark pipeline set-up).
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a sub-pipeline of \code{benchmark}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' @export
GetSubpipelineName <- function(
  benchmark, idx.subpipeline
) {
  GetSubpipelineTags(benchmark$subpipelines, idx.subpipeline)$subpipeline
}

#' Get \code{Benchmark} latent-space projection
#'
#' Extracts coordinate matrix of a latent-space projection generated by evaluating an object of type \code{Benchmark}.
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a sub-pipeline of \code{benchmark}
#' @param idx.n_param integer: index of an *n*-parameter iteration of a sub-pipeline of \code{benchmark}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)\
#' 
#' * **\code{Evaluate}**: runs all benchmark sub-pipelines and scores the performance of each tool
#'
#' @export
GetProjection <- function(
  benchmark, idx.subpipeline, idx.n_param = NULL
) {
  .h5readProjectionResult(benchmark, idx.subpipeline, idx.n_param)
}

#' Get \code{Benchmark} 2-dimensional layout of input data
#'
#' Extracts coordinate matrix of a 2-dimensional layout associated with the input data for a \code{Benchmark} object (benchmark pipeline set-up).
#' This layout can be created by applying \code{AddLayout} to a previously evaluated benchmark pipeline and serves visualisation purposes.
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param concatenate logical value; if a list of layout matrices (per sample) is available, should it be concatenated into one vector? Default value is \code{FALSE}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)\
#' 
#' * **\code{Evaluate}**: runs all benchmark sub-pipelines and scores the performance of each tool
#' 
#' * **\code{AddLayout}**: allows you to add a separate 2-dimensional layout of the input dataset or to use an existing projection (produced in the evaluation) as a visualisation layout.
#'
#' @export
GetLayout <- function(
  benchmark, concatenate = FALSE
) {
  if (!benchmark$layout_available) return(NULL)
  ref <- rhdf5::h5read(benchmark$h5_path, 'Input/Layout/IsReferenceTo')
  if (length(ref) == 2 && !any(is.na(ref)) && is.numeric(ref)) {
    obj <- GetProjection(benchmark, ref[1], ref[2])
  } else {
    obj <- rhdf5::h5read(benchmark$h5_path, 'Input/Layout/Coordinates')
  }
  if (is.list(obj)) {
    obj <- purrr::map(obj, function(x) { x[, 1:2]; colnames(x) <- c('Component1', 'Component2') })
  } else {
    obj <- obj[, 1:2]
    colnames(obj) <- c('Component1', 'Component2')
  }
    
  if (is.list(obj) && concatenate)
    obj <- do.call(rbind, obj)
  
  obj
}

#' Get \code{Benchmark} bootstrap indices
#'
#' Extracts vector of indices (or list of vectors of indices) used for taking bootstraps of input expression data for clustering (if this is done).
#' 
#' @param benchmark object of type \code{Benchmark}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)\
#'
#' @export
GetBootstrapIndices <- function(
  benchmark
) {
  .h5read(benchmark$h5_path, 'Input/BootstrapIndices')
}

#' Get \code{Benchmark} clustering input
#'
#' Extracts the input to a clustering tool that is part of a benchmark sub-pipeline that was set up previously.
#' If a dimension-reduction tool precedes the clustering tool, the latent-space projection from that tool is retrieved.
#' Otherwise, the original expression data is retrieved.
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a sub-pipeline of \code{benchmark}
#' @param idx.n_param integer: index of an *n*-parameter iteration of a sub-pipeline of \code{benchmark}
#' @param null_if_exprs logical: whether to return \code{NULL} instead of original expression data. Default value is \code{FALSE}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)\
#' 
#' * **\code{Evaluate}**: runs all benchmark sub-pipelines and scores the performance of each tool
#'
#' @export
GetClusteringInput <- function(
  benchmark, idx.subpipeline, idx.n_param = NULL, null_if_exprs = FALSE
) {
  n_npar <- GetNParameterIterationsCount(benchmark, idx.subpipeline)
  if (n_npar == 0)
    idx.n_param <- NULL
  .h5readClusteringInput(benchmark, idx.subpipeline, idx.n_param, null_if_exprs = null_if_exprs)
}

#' Get \code{Benchmark} clustering result
#'
#' Extracts the result of a clustering tool that is part of a benchmark sub-pipeline that was set up previously.
#' If stability analysis is turned on for the benchmark, either results of all runs of the clustering can be returned, or only the result of a single run on the full expression data can be returned.
#' 
#' If you choose to extract results of all runs of a clustering algorithm which clustered on bootstraps of the original data, the last slot in the list contains the single run on original data.
#' Furthermore, you can extract bootstrap indices using \code{GetBootstrapIndices}.
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a sub-pipeline of \code{benchmark}
#' @param idx.n_param integer: index of an *n*-parameter iteration of a sub-pipeline of \code{benchmark}
#' @param all_runs logical: whether results of all runs should be returned (in a list). Default value is \code{FALSE}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)\
#' 
#' * **\code{Evaluate}**: runs all benchmark sub-pipelines and scores the performance of each tool
#'
#' @export
GetClustering <- function(
  benchmark, idx.subpipeline, idx.n_param = NULL, all_runs = FALSE, concatenate = TRUE
) {
  obj <- .h5readClusteringResult(benchmark, idx.subpipeline, idx.n_param)$ClusteringVector
  if (!all_runs) {
    if (benchmark$stability == 'repeat')
      obj <- obj[[1]]
    else if (benchmark$stability == 'bootstrap')
      obj <- obj[[length(obj)]]
  }
  if (concatenate && is.list(obj))
    obj <- do.call(c, obj)
  obj
}

#' Get \code{Benchmark} *n*-parameter iteration name for a projection step
#'
#' Extracts name of an *n*-parameter iteration of the projection step of a sub-pipeline in a \code{Benchmark} object (benchmark pipeline set-up).
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a subpipeline of \code{benchmark}
#' @param idx.n_param integer: index of an *n*-parameter iteration of a sub-pipeline of \code{benchmark}
#' @param with_tool_name logical: whether tool name, in addition to *n*-parameter value, should be included in the name. Default value is \code{TRUE}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' @export
GetNParameterIterationName_Projection <- function(
  benchmark, idx.subpipeline, idx.n_param, with_tool_name = TRUE
) {
  
  proj <- benchmark$subpipelines[[idx.subpipeline]]$projection
  
  if (is.null(proj))
    return(NULL)
  
  if (IsClone(proj)) {
    idx.subpipeline <- proj$ref
    proj <- benchmark$subpipelines[[idx.subpipeline]]$projection
  }
  
  tool_name <- proj$name
  
  if (is.null(idx.n_param) || idx.n_param == 'NoNParameter')
    return(tool_name)
  
  n_param_name <- proj$modules[[proj$which_n_param]]$n_param
  n_param_val <- benchmark$n_params[[idx.subpipeline]]$projection[idx.n_param]
  
  string_n_param <- paste0(n_param_name, '=', n_param_val)
  
  if (with_tool_name)
    return(paste0(tool_name, ' [', string_n_param, ']'))
  else
    return(string_n_param)
}

GetProjectionModuleCount <- function(benchmark, idx.subpipeline) {
  if (!is.null(benchmark$subpipelines[[idx.subpipeline]]$projection)) {
    proj <- benchmark$subpipelines[[idx.subpipeline]]$projection
    if (IsClone(proj)) {
      proj <- benchmark$subpipelines[[proj$ref]]$projection
    }
    return(proj$n_modules)
  } else {
    return(NULL)
  }
}

GetClusteringModuleCount <- function(benchmark, idx.subpipeline) {
  if (!is.null(benchmark$subpipelines[[idx.subpipeline]]$clustering))
    benchmark$subpipelines[[idx.subpipeline]]$clustering$n_modules
  else
    NULL
}

#' Get \code{Benchmark} *n*-parameter iteration clustering name
#'
#' Extracts name of an *n*-parameter iteration of the clustering part of a subpipeline in a \code{Benchmark} object (benchmark pipeline set-up).
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a subpipeline of \code{benchmark}
#' @param idx.n_param integer: index of an *n*-parameter iteration of a sub-pipeline of \code{benchmark}
#' @param with_tool_name logical: whether tool name, in addition to *n*-parameter value, should be included in the name. Default value is \code{TRUE}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' @export
GetNParameterIterationName_Clustering <- function(
  benchmark, idx.subpipeline, idx.n_param, with_tool_name = TRUE
) {
  
  clus <- benchmark$subpipelines[[idx.subpipeline]]$clustering
  
  if (is.null(clus))
    return(NULL)
  
  tool_name <- clus$name
  
  if (is.null(idx.n_param) || is.null(clus$which_n_param))
    return(tool_name)
  
  n_param_name <- clus$modules[[clus$which_n_param]]$n_param
  n_param_val <- benchmark$n_params[[idx.subpipeline]]$clustering[idx.n_param]
  
  string_n_param <- paste0(n_param_name, '=', n_param_val)
  
  if (with_tool_name)
    return(paste0(tool_name, ' [', string_n_param, ']'))
  else
    return(string_n_param)
}

#' Get \code{Benchmark} *n*-parameter values for a subpipeline
#'
#' Extracts combinations of *n*-parameter values for each *n*-parameter iteration of a subpipeline in a \code{Benchmark} object (benchmark pipeline set-up).
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a subpipeline of \code{benchmark}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' @export
GetNParameterValues <- function(
  benchmark,
  idx.subpipeline
) {
  
  npar_proj <-
    if (IsClone(benchmark$subpipelines[[idx.subpipeline]]$projection))
      benchmark$n_params[[benchmark$subpipelines[[idx.subpipeline]]$projection$ref]]$projection
    else
      benchmark$n_params[[idx.subpipeline]]$projection
  npar_clus <- benchmark$n_params[[idx.subpipeline]]$clustering
  
  n <- max(length(npar_proj), length(npar_clus))
  if (is.null(npar_proj)) npar_proj <- rep(NA, n)
  if (is.null(npar_clus)) npar_clus <- rep(NA, n)
  
  data.frame(
    'idx.n_param' = seq_len(n),
    'npar_proj' = npar_proj,
    'npar_clus' = npar_clus
  )
}

#' Get \code{Benchmark} *n*-parameter iteration names of a subpipeline
#'
#' Extracts names of all *n*-parameter iterations of a subpipeline in a \code{Benchmark} object (benchmark pipeline set-up).
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a subpipeline of \code{benchmark}
#' @param with_tool_names logical: whether names of the modules should be included. Default value is \code{FALSE}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' @export
GetNParameterNames <- function(
  benchmark,
  idx.subpipeline,
  with_tool_names = FALSE
) {
  
  proj <- benchmark$subpipelines[[idx.subpipeline]]$projection
  clus <- benchmark$subpipelines[[idx.subpipeline]]$clustering
  
  if (IsClone(proj))
    proj <- benchmark$subpipelines[[proj$ref]]$projection
  
  proj_n_param <- if (!is.null(proj) && !is.null(proj$which_n_param)) proj$modules[[proj$which_n_param]]$n_param else NULL
  proj_idx     <- if (!is.null(proj) && !is.null(proj$which_n_param)) proj$which_n_param else NULL
  proj_tool    <- if (!is.null(proj) && !is.null(proj$which_n_param)) proj$modules[[proj$which_n_param]]$name else NULL
  
  
  clus_n_param <- if (!is.null(clus) && !is.null(clus$which_n_param)) clus$modules[[clus$which_n_param]]$n_param else NULL
  clus_idx     <- if (!is.null(clus) && !is.null(clus$which_n_param)) clus$which_n_param else NULL
  clus_tool    <- if (!is.null(clus) && !is.null(clus$which_n_param)) clus$modules[[clus$which_n_param]]$name else NULL
  
  list(
    projection = if (is.null(proj_n_param)) { NULL } else { if (with_tool_names) paste0(proj_tool, ' (module ', proj_idx, ') ', proj_n_param) else proj_n_param },
    clustering = if (is.null(clus_n_param)) { NULL } else { if (with_tool_names) paste0(clus_tool, ' (module ', clus_idx, ') ', clus_n_param) else clus_n_param }
  )
}

#' Get \code{Benchmark} *n*-parameter iteration name
#'
#' Extracts name of an *n*-parameter iteration of a subpipeline in a \code{Benchmark} object (benchmark pipeline set-up).
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a subpipeline of \code{benchmark}
#' @param idx.n_param integer: index of an *n*-parameter iteration of a sub-pipeline of \code{benchmark}
#' @param with_tool_names logical: whether tool name(s), in addition to *n*-parameter value(s), should be included in the name. Default value is \code{TRUE}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' @export
GetNParameterIterationName <- function(
  benchmark, idx.subpipeline, idx.n_param, with_tool_names = TRUE
) {
  if (is.null(idx.n_param))
    return(GetSubpipelineName(benchmark, idx.subpipeline))
  name_proj <- GetNParameterIterationName_Projection(benchmark, idx.subpipeline, idx.n_param, with_tool_names)
  name_clus <- GetNParameterIterationName_Clustering(benchmark, idx.subpipeline, idx.n_param, with_tool_names)
  
  if (is.null(name_proj))
    name_clus
  else if (is.null(name_clus))
    name_proj
  else if (is.null(name_proj) && is.null(name_clus))
    NULL
  else
    paste0(name_proj, ' -> ', name_clus)
}

#' Get \code{Benchmark} *n*-parameter count for subpipeline
#'
#' Extract the number of *n*-parameter iterations for a chosen subpipeline in a \code{Benchmark} object (benchmark pipeline set-up).
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a sub-pipeline of \code{benchmark}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' @export
GetNParameterIterationsCount <- function(
  benchmark, idx.subpipeline
) {
  if (!is.null(benchmark$n_params[[idx.subpipeline]]$clustering))
    length(benchmark$n_params[[idx.subpipeline]]$clustering)
  else if (!is.null(benchmark$n_params[[idx.subpipeline]]$projection))
    length(benchmark$n_params[[idx.subpipeline]]$projection)
  else if (IsClone(benchmark$subpipelines[[idx.subpipeline]]$projection)) {
    length(benchmark$n_params[[benchmark$subpipelines[[idx.subpipeline]]$projection$ref]]$projection)
  } else
    0
}

#' Get \code{Benchmark} dimension-reduction evaluation scores
#'
#' Extracts list of evaluation scores and other products of evaluating performance of a projection step of a sub-pipeline *n*-iteration that includes dimension reduction.
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a subpipeline of \code{benchmark}
#' @param idx.n_param integer: index of an *n*-parameter iteration of a sub-pipeline of \code{benchmark}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' * **\code{Evaluate}**: runs all benchmark sub-pipelines and scores the performance of each tool
#'
#' @export
GetProjectionScores <- function(
  benchmark, idx.subpipeline, idcs.n_param = NULL
) {
  
  if (is.null(idcs.n_param))
    idcs.n_param <- seq_len(GetNParameterIterationsCount(benchmark, idx.subpipeline))
  
  no_npar <- FALSE
  if (length(idcs.n_param) == 0) {
    no_npar <- TRUE
    idcs.n_param <- 1
  }
  
  proj_scores <- NULL
  if (!is.null(benchmark$subpipelines[[idx.subpipeline]]$projection)) {
    proj_scores <- purrr::map(
      idcs.n_param,
      function(idx.n_param) .h5readProjectionScoring(benchmark, idx.subpipeline, if (no_npar) NULL else idx.n_param)
    )
    names(proj_scores) <- purrr::map_chr(
      idcs.n_param,
      function(idx.n_param) GetNParameterIterationName_Projection(benchmark, idx.subpipeline, if (no_npar) NULL else idx.n_param)
    )
  }
  proj_scores
}

#' Get \code{Benchmark} clustering evaluation scores
#'
#' Extracts list of evaluation scores and other products of evaluating performance of a clustering tool used in a subpipeline *n*-iteration in a \code{Benchmark} object (benchmark pipeline set-up).
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a subpipeline of \code{benchmark}
#' @param idx.n_param integer: index of an *n*-parameter iteration of a sub-pipeline of \code{benchmark}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' * **\code{Evaluate}**: runs all benchmark sub-pipelines and scores the performance of each tool
#'
#' @export
GetClusteringScores <- function(
  benchmark, idx.subpipeline, idcs.n_param = NULL
) {
  if (is.null(idcs.n_param))
    idcs.n_param <- seq_len(GetNParameterIterationsCount(benchmark, idx.subpipeline))
  
  no_npar <- FALSE
  if (length(idcs.n_param) == 0) {
    no_npar <- TRUE
    idcs.n_param <- 1
  }
  
  clus_scores <- NULL
  if (!is.null(benchmark$subpipelines[[idx.subpipeline]]$clustering)) {
    clus_scores <- purrr::map(
      idcs.n_param,
      function(idx.n_param) .h5readClusteringScoring(benchmark, idx.subpipeline, if (no_npar) NULL else idx.n_param)
    )
    names(clus_scores) <- purrr::map_chr(
      idcs.n_param,
      function(idx.n_param) GetNParameterIterationName_Clustering(benchmark, idx.subpipeline, if (no_npar) NULL else idx.n_param)
    )
  }
  clus_scores
}

#' Get \code{Benchmark} evaluation scores of an *n*-parameter iteration
#'
#' Extracts list of evaluation scores and other products of evaluating performance of tools used in a subpipeline *n*-iteration in a \code{Benchmark} object (benchmark pipeline set-up).
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a sub-pipeline of \code{benchmark}
#' @param idcs.n_param integer or vector of integers: indices of *n*-parameter iterations of a subpipeline of \code{benchmark}
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' * **\code{Evaluate}**: runs all benchmark sub-pipelines and scores the performance of each tool
#'
#' @export
GetScores <- function(
  benchmark, idx.subpipeline, idcs.n_param = NULL
) {
  if (is.null(idcs.n_param))
    idcs.n_param <- seq_len(GetNParameterIterationsCount(benchmark, idx.subpipeline))
    
  no_npar <- FALSE
  if (length(idcs.n_param) == 0) {
    no_npar <- TRUE
    idcs.n_param <- 1
  }
  
  proj_scores <- NULL
  if (!is.null(benchmark$subpipelines[[idx.subpipeline]]$projection) && benchmark$score_projections) {
    proj_scores <- purrr::map(
      idcs.n_param,
      function(idx.n_param) .h5readProjectionScoring(benchmark, idx.subpipeline, if (no_npar) NULL else idx.n_param)
    )
    names(proj_scores) <- purrr::map_chr(
      idcs.n_param,
      function(idx.n_param) GetNParameterIterationName_Projection(benchmark, idx.subpipeline, if (no_npar) NULL else idx.n_param)
    )
  }
  clus_scores <- NULL
  if (!is.null(benchmark$subpipelines[[idx.subpipeline]]$clustering)) {
    clus_scores <- purrr::map(
      idcs.n_param,
      function(idx.n_param) .h5readClusteringScoring(benchmark, idx.subpipeline, if (no_npar) NULL else idx.n_param)
    )
    names(clus_scores) <- purrr::map_chr(
      idcs.n_param,
      function(idx.n_param) GetNParameterIterationName_Clustering(benchmark, idx.subpipeline, if (no_npar) NULL else idx.n_param)
    )
  }
  list(
    Projection = proj_scores,
    Clustering = clus_scores
  )
}

#' Get \code{Benchmark} projection evaluation scores table
#'
#' Returns a \code{tibble} of projection evaluation metric values for specified subpipeline of a \code{Benchmark} object (benchmark pipeline set-up).
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a subpipeline of \code{benchmark} that includes a projection step
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' * **\code{Evaluate}**: runs all benchmark subpipelines and scores the performance of each tool
#'
#' @export
GetProjectionScoringTable <- function(
  benchmark,
  idx.subpipeline
) {
  scores <- GetProjectionScores(benchmark, idx.subpipeline)
  
  n_param_names  <- GetNParameterNames(benchmark, idx.subpipeline)
  n_param_values <- GetNParameterValues(benchmark, idx.subpipeline)
  
  lcmc <- purrr::map(scores, function(x) x$`Local Continuity Meta-Criterion`)
  Bnx <- purrr::map(scores, function(x) x$`Relative Intrusiveness`)
  eT <- purrr::map(scores, function(x) x$`Trustworthiness`)
  eC <- purrr::map(scores, function(x) x$`Continuity`)
  
  names(lcmc) <- names(Bnx) <- names(eT) <- names(eC) <- names(scores)
  
  full <- any(!is.na(unlist(eT)))
  
  if (full) {
    metrics_names <- c(
      'Local Continuity Meta-Criterion',
      'Relative Intrusiveness',
      'Trustworthiness',
      'Continuity'
    )
  } else {
    metrics_names <- c(
      'Local Continuity Meta-Criterion',
      'Relative Intrusiveness'
    )
  }
  
  if (nrow(n_param_values) == 0) {
    vals <- if (full) unlist(c(lcmc, Bnx, eT, eC)) else unlist(c(lcmc, Bnx))
    vals <- dplyr::tibble(
      'Evaluation Metric' = metrics_names,
      'Value' = vals
    )
    return(vals)
  } else {
    vals_by_npar <-
      if (full)
        dplyr::tibble(
          'npar_proj' = n_param_values$npar_proj,
          unlist(lcmc),
          unlist(Bnx),
          unlist(eT),
          unlist(eC)
        )
      else
        dplyr::tibble(
          'npar_proj' = n_param_values$npar_proj,
          unlist(lcmc),
          unlist(Bnx)
        )
    vals_by_npar <- vals_by_npar[, apply(vals_by_npar, 2, function(x) any(!is.na(x)))]
    colnames(vals_by_npar) <- c(n_param_names$projection, metrics_names)
    vals_by_npar <- tidyr::pivot_longer(vals_by_npar, cols = metrics_names, names_repair = 'minimal')
    colnames(vals_by_npar)[(ncol(vals_by_npar) - 1):ncol(vals_by_npar)] <- c('Evaluation Metric', 'Value')
    vals_by_npar$`Evaluation Metric` <- as.factor(vals_by_npar$`Evaluation Metric`)
    return(vals_by_npar)
  }
}

#' Get \code{Benchmark} clustering evaluation scores table
#'
#' Returns a \code{tibble} of clustering evaluation metric values for specified subpipeline of a \code{Benchmark} object (benchmark pipeline set-up).
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a sub-pipeline of \code{benchmark} that includes a clustering step
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' * **\code{Evaluate}**: runs all benchmark sub-pipelines and scores the performance of each tool
#'
#' @export
GetClusteringScoringTable <- function(
  benchmark,
  idx.subpipeline
) {
  scores <- GetClusteringScores(benchmark, idx.subpipeline)
  
  n_param_names  <- GetNParameterNames(benchmark, idx.subpipeline)
  n_param_values <- GetNParameterValues(benchmark, idx.subpipeline)
  
  db           <- purrr::map(scores, function(x) unlist(x$`Davies-Bouldin Index`))
  ari          <- purrr::map(scores, function(x) unlist(x$`Adjusted Rand Index`))
  #nmi          <- purrr::map(scores, function(x) unlist(x$`Normalised Mutual Information`))
  f1_bij       <- purrr::map(scores, function(x) unlist(x$`Mean F1 Across Matches (Bijective)`))
  f1_fixed_cl  <- purrr::map(scores, function(x) unlist(x$`Mean F1 Across Matches (Relaxed, Fixed Cluster)`))
  f1_fixed_lab <- purrr::map(scores, function(x) unlist(x$`Mean F1 Across Matches (Relaxed, Fixed Label)`))
  pr_bij       <- purrr::map(scores, function(x) unlist(x$`Mean Precision Across Matches (Bijective)`))
  pr_fixed_cl  <- purrr::map(scores, function(x) unlist(x$`Mean Precision Across Matches (Relaxed, Fixed Cluster)`))
  pr_fixed_lab <- purrr::map(scores, function(x) unlist(x$`Mean Precision Across Matches (Relaxed, Fixed Label)`))
  re_bij       <- purrr::map(scores, function(x) unlist(x$`Mean Recall Across Matches (Bijective)`))
  re_fixed_cl  <- purrr::map(scores, function(x) unlist(x$`Mean Recall Across Matches (Relaxed, Fixed Cluster)`))
  re_fixed_lab <- purrr::map(scores, function(x) unlist(x$`Mean Recall Across Matches (Relaxed, Fixed Label)`))
  names(db) <- names(ari) <- #names(nmi) <- 
    names(f1_bij) <- names(f1_fixed_cl) <- names(f1_fixed_lab) <-
    names(pr_bij) <- names(pr_fixed_cl) <- names(pr_fixed_lab) <-
    names(re_bij) <- names(re_fixed_cl) <- names(re_fixed_lab) <- names(scores)
  metrics_names <- c(
    'Davies-Bouldin Index', 'Adjusted Rand Index', #'Normalised Mutual Information',
    'Mean F1 Bijective', 'Mean F1 Fixed-Cluster', 'Mean F1 Fixed-Label',
    'Mean Precision Bijective', 'Mean Precision Fixed-Cluster', 'Mean Precision Fixed-Label',
    'Mean Recall Bijective', 'Mean Recall Fixed-Cluster', 'Mean Recall Fixed-Label'
  )
  multiple_runs <- length(f1_bij[[1]]) > 1
  if (nrow(n_param_values) == 0) {
    vals        <- c(db, ari, #nmi,
                     f1_bij, f1_fixed_cl, f1_fixed_lab, pr_bij, pr_fixed_cl, pr_fixed_lab, re_bij, re_fixed_cl, re_fixed_lab)
    if (!multiple_runs) {
      vals <- dplyr::tibble(
        'Evaluation Metric' = metrics_names,
        'Value' = unlist(vals)
      )
    } else {
      vals <- dplyr::tibble(
        'Evaluation Metric' = metrics_names,
        'Mean Value' = sapply(vals, mean),
        'Standard Deviation' = sapply(vals, sd)
      )
    }
    return(vals)
  } else {
    if (!multiple_runs) {
      vals_by_npar <- dplyr::tibble(
        'npar_proj' = n_param_values$npar_proj,
        'npar_clus' = n_param_values$npar_clus,
        unlist(db), unlist(ari), #unlist(nmi),
        unlist(f1_bij), unlist(f1_fixed_cl), unlist(f1_fixed_lab), unlist(pr_bij), unlist(pr_fixed_cl), unlist(pr_fixed_lab), unlist(re_bij), unlist(re_fixed_cl), unlist(re_fixed_lab)
      )
      vals_by_npar <- vals_by_npar[, apply(vals_by_npar, 2, function(x) any(!is.na(x)))]
      colnames(vals_by_npar) <- c(n_param_names$projection, n_param_names$clustering, metrics_names)
      vals_by_npar <- tidyr::pivot_longer(vals_by_npar, cols = metrics_names, names_repair = 'minimal')
      colnames(vals_by_npar)[(ncol(vals_by_npar) - 1):ncol(vals_by_npar)] <- c('Evaluation Metric', 'Value')
    } else {
      means_by_npar <- dplyr::tibble(
        'npar_proj' = n_param_values$npar_proj,
        'npar_clus' = n_param_values$npar_clus,
        sapply(db, mean), sapply(ari, mean),
        #sapply(nmi, mean),
        sapply(f1_bij, mean), sapply(f1_fixed_cl, mean), sapply(f1_fixed_lab, mean), sapply(pr_bij, mean), sapply(pr_fixed_cl, mean), sapply(pr_fixed_lab, mean), sapply(re_bij, mean), sapply(re_fixed_cl, mean), sapply(re_fixed_lab, mean)
      )
      sd_by_npar <- dplyr::tibble(
        'npar_proj' = n_param_values$npar_proj,
        'npar_clus' = n_param_values$npar_clus,
        sapply(db, sd), sapply(ari, sd), #sapply(nmi, mean),
        sapply(f1_bij, sd), sapply(f1_fixed_cl, sd), sapply(f1_fixed_lab, sd), sapply(pr_bij, sd), sapply(pr_fixed_cl, sd), sapply(pr_fixed_lab, sd), sapply(re_bij, sd), sapply(re_fixed_cl, sd), sapply(re_fixed_lab, sd)
      )
      
      npar_proj <- n_param_names$projection
      if (is.null(npar_proj))
        npar_proj <- 'npar_proj'
      npar_clus <- n_param_names$clustering
      if (is.null(npar_clus))
        npar_clus <- 'npar_clus'
      
      colnames(means_by_npar) <- colnames(sd_by_npar) <- c(npar_proj, npar_clus, metrics_names)
      means_by_npar <- tidyr::pivot_longer(means_by_npar, cols = metrics_names)
      sd_by_npar <- tidyr::pivot_longer(sd_by_npar, cols = metrics_names)
      
      vals_by_npar <- dplyr::tibble(
        'npar_proj' = means_by_npar[[npar_proj]],
        'npar_clus' = means_by_npar[[npar_clus]],
        'Evaluation Metric' = means_by_npar$name,
        'Mean Value' = means_by_npar$value,
        'Standard Deviation' = sd_by_npar$value
      )
      colnames(vals_by_npar)[1:2] <- c(npar_proj, npar_clus)
      idcs_keep <- apply(vals_by_npar, 2, function(x) !all(is.na(x)))
      vals_by_npar <- vals_by_npar[idcs_keep]
    }
    vals_by_npar$`Evaluation Metric` <- as.factor(vals_by_npar$`Evaluation Metric`)
    return(vals_by_npar)
  }
}

#' Get RMSDs of each cluster in clustering result of an evaluated \code{Benchmark} pipeline
#'
#' Returns the vector of RMSDs per cluster.
#' 
#' @param benchmark object of type \code{Benchmark}
#' @param idx.subpipeline integer: index of a sub-pipeline of \code{benchmark} that includes a clustering step
#' @param idx.n_param integer: index of *n*-parameter iteration of a subpipeline of \code{benchmark} (if *n*-parameters specified). Default value is \code{NULL}
#' @param idx.run integer: index of clustering run if repeated runs were evaluated. Default value is 1
#'
#' @seealso
#' 
#' * **\code{Benchmark}**: constructs a \code{Benchmark} object (benchmark pipeline set-up)
#'
#' * **\code{Evaluate}**: runs all benchmark sub-pipelines and scores the performance of each tool
#'
#' @export
GetRMSDPerCluster <- function(
  benchmark,
  idx.subpipeline,
  idx.n_param = NULL,
  idx.run = NULL
) {
  cl <- GetClustering(benchmark, idx.subpipeline, idx.n_param, all_runs = TRUE)
  exprs <- GetExpressionMatrix(benchmark, concatenate = TRUE)
  if (benchmark$stability == 'repeat' && !is.null(idx.run)) {
    cl <- cl[[idx.run]]
    return(rmsd_per_cluster(exprs, cl))
  } else if (benchmark$stability %in% c('single', 'repeat')) {
    return(rmsd_per_cluster(exprs, cl))
  } else { # return results for multiple runs
    res <- purrr::map(cl, function(x) rmsd_per_cluster(exprs, x))
    names(res) <- paste0('Run', seq_along(res))
    return(res)
  }
}

GetLabelClusterMatching <- function(
  benchmark,
  idx.subpipeline,
  idx.n_param = NULL,
  idx.run = NULL
) {
  s <- GetClusteringScores(b, idx.subpipeline, idx.n_param)
  bij <- s[[1]]$`Label-Cluster Matching (Bijective)`
  fc <- s[[1]]$`Label-Cluster Matching (Relaxed, Fixed Cluster)`
  fl <- s[[1]]$`Label-Cluster Matching (Relaxed, Fixed Label)`
  if (benchmark$stability == 'repeat' && !is.null(idx.run)) {
    bij <- bij[bij$Run == idx.run, -1]
    fc <- fc[fc$Run == idx.run, -1]
    fl <- fl[fl$Run == idx.run, -1]
  }
  if (ncol(bij) == 2) {
    bij <- data.frame(bij); colnames(bij) <- c('Population', 'Cluster')
    fc <- data.frame(fc); colnames(fc) <- c('Cluster', 'Population')
    fl <- data.frame(fl); colnames(fl) <- c('Population', 'Cluster')
  }
  list(
    'Bijective' = bij,
    'Fixed Cluster' = fc,
    'Fixed Label' = fl
  )
}

GetRMSDPerPopulation <- function(
  benchmark,
  idx.subpipeline,
  idx.n_param = NULL,
  idx.run = NULL,
  match_type = 'bijective'
) {
  rmsd <- GetRMSDPerCluster(benchmark, idx.subpipeline, idx.n_param, idx.run)
  matching <- GetLabelClusterMatching(benchmark, idx.subpipeline, idx.n_param, idx.run)
  
  match_type <- match.arg(match_type, choices = c('bijective', 'fixed_cluster', 'fixed_label', 'real'))
  if (match_type == 'bijective') {
    matching <- matching$Bijective
  } else if (match_type == 'fixed_cluster') {
    matching <- matching$`Fixed Cluster`
    colnames(matching) <- c('Cluster', 'Population')
  } else if (match_type == 'fixed_label') {
    matching <- matching$`Fixed Label`
  } else if (match_type == 'real') {
    annot <- GetAnnotation(b, concatenate = TRUE)
    res <- rmsd_per_cluster(GetExpressionMatrix(b, concatenate = TRUE), annot)
    names(res) <- levels(annot)
    if (!is.null(benchmark$unassigned_labels)) {
      res <- res[!names(res) %in% benchmark$unassigned_labels]
    }
    return(as.list(res))
  }
    
  lab_u <- levels(GetAnnotation(benchmark, concatenate = TRUE))
  lab_u <- lab_u[!lab_u %in% benchmark$unassigned_labels]
  if (benchmark$stability == 'single' || !is.null(idx.run)) {
    res <- purrr::map(lab_u, function(pop) {
      idx_clusters <- which(purrr::map_lgl(matching$Population, function(x) isTRUE(x == pop)))
      if (length(idx_clusters) == 0) {
        r <- NA
        names(r) <- 'NA'
        return(r)
      }
      r <- rmsd[as.numeric(matching$Cluster[idx_clusters])]
      names(r) <- idx_clusters
      r
    })
    names(res) <- lab_u
  } else {
    res <- vector(mode = 'list', length = max(matching$Run))
    names(res) <- paste0('Run', seq_along(res))
    for (idx.run in seq_along(res)) {
      m <- matching[matching$Run == idx.run, ]
      idx_clusters <- which(purrr::map_lgl(m$Population, function(x) isTRUE(x == pop)))
      if (length(idx_clusters) == 0) {
        r <- NA
        names(r) <- 'NA'
        return(r)
      }
      res[[idx.run]] <- purrr::map(lab_u, function(pop) {
        r <- rmsd[[idx.run]][as.numeric(m$Cluster[idx_clusters])]
        names(r) <- idx_clusters
        r
      })
      names(res[[idx.run]]) <- lab_u
    }
  }
  res
}

GetPopulationSizes <- function(
  benchmark, as_list = TRUE, include_unassigned = FALSE
) {
  annot <- GetAnnotation(benchmark, concatenate = TRUE)
  if (as_list) {
    res <- as.list(table(annot))
    if (!include_unassigned) {
      res <- res[!names(res) %in% benchmark$unassigned_labels]
    }
  } else {
    res <- as.data.frame(table(annot))
    colnames(res) <- c('Population', 'Size')
    if (!include_unassigned) {
      res <- res[!res$Population %in% benchmark$unassigned_labels, ]
    }
  }
  res
}

GetClusterSizes <- function(
  benchmark,
  idx.subpipeline,
  idx.n_param = NULL,
  idx.run = NULL,
  as_list = TRUE
) {
  cl <- GetClustering(benchmark, idx.subpipeline, idx.n_param, all_runs = TRUE)
  
  if (benchmark$stability == 'repeat' && !is.null(idx.run)) {
    cl <- cl[[idx.run]]
  }
  
  if (is.list(cl)) {
    res <- vector(mode = 'list', length = length(cl))
    names(res) <- paste0('Run', seq_along(res))
    for (idx.run in seq_along(res)) {
      if (as_list) {
        res[[idx.run]] <- as.list(table(cl[[idx.run]]))
      } else {
        res[[idx.run]] <- as.data.frame(table(cl[[idx.run]]))
        colnames(res[[idx.run]]) <- c('Cluster', 'Size')
      }
    }
  } else {
    if (as_list) {
      res <- as.list(table(cl))
    } else {
      res <- as.data.frame(table(cl))
      colnames(res) <- c('Cluster', 'Size')
    }
  }
  
  res
}

GetMatchedClusterSizes <- function(
  benchmark,
  idx.subpipeline,
  idx.n_param = NULL,
  idx.run = NULL,
  match_type = 'bijective'
) {
  sizes <- GetClusterSizes(benchmark, idx.subpipeline, idx.n_param, idx.run, as_list = FALSE)
  matching <- GetLabelClusterMatching(benchmark, idx.subpipeline, idx.n_param, idx.run)
  
  match_type <- match.arg(match_type, choices = c('bijective', 'fixed_cluster', 'fixed_label'))
  if (match_type == 'bijective') {
    matching <- matching$Bijective
  } else if (match_type == 'fixed_cluster') {
    matching <- matching$`Fixed Cluster`
    colnames(matching) <- c('Cluster', 'Population')
  } else if (match_type == 'fixed_label') {
    matching <- matching$`Fixed Label`
  }
  
  lab_u <- levels(GetAnnotation(benchmark, concatenate = TRUE))
  lab_u <- lab_u[!lab_u %in% benchmark$unassigned_labels]
  if (benchmark$stability == 'single' || !is.null(idx.run)) {
    res <- purrr::map(lab_u, function(pop) {
      idx_clusters <- which(purrr::map_lgl(matching$Population, function(x) isTRUE(x == pop)))
      if (length(idx_clusters) == 0) return(0)
      idcs <- match(as.numeric(matching$Cluster[idx_clusters]), sizes$Cluster)
      r <- sizes$Size[idcs]
      names(r) <- idx_clusters
      r
    })
    names(res) <- lab_u
  } else {
    res <- vector(mode = 'list', length = max(matching$Run))
    names(res) <- paste0('Run', seq_along(res))
    for (idx.run in seq_along(res)) {
      m <- matching[matching$Run == idx.run, ]
      res[[idx.run]] <- purrr::map(lab_u, function(pop) {
        idx_clusters <- which(purrr::map_lgl(m$Population, function(x) isTRUE(x == pop)))
        if (length(idx_clusters) == 0) return(0)
        idcs <- match(as.numeric(m$Cluster[idx_clusters]), sizes[[idx.run]]$Cluster)
        r <- sizes[[idx.run]]$Size[idcs]
        names(r) <- idx_clusters
        r
      })
      names(res[[idx.run]]) <- lab_u
    }
  }
  res
}
davnovak/SingleBench documentation built on Dec. 19, 2021, 9:10 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
davnovak/SingleBench
Framework for benchmarking and optimising dimension-reduction and clustering pipelines in expression data analysis

R/99_Getters.R
In davnovak/SingleBench: Framework for benchmarking and optimising dimension-reduction and clustering pipelines in expression data analysis

R Package Documentation

Browse R Packages

We want your feedback!

davnovak/SingleBench Framework for benchmarking and optimising dimension-reduction and clustering pipelines in expression data analysis

R/99_Getters.R In davnovak/SingleBench: Framework for benchmarking and optimising dimension-reduction and clustering pipelines in expression data analysis

R Package Documentation

Browse R Packages

We want your feedback!

davnovak/SingleBench
Framework for benchmarking and optimising dimension-reduction and clustering pipelines in expression data analysis

R/99_Getters.R
In davnovak/SingleBench: Framework for benchmarking and optimising dimension-reduction and clustering pipelines in expression data analysis