R/01_BenchmarkSetUp_00_Modules.R

Defines functions GetSubpipelineTags AlignSubpipelines MergeOverlaps print.Subpipeline Subpipeline fMap.ModuleChain fExtract.ModuleChain fTrain.ModuleChain print.ModuleChain Chain fMap.Module fExtract.Module fTrain.Module print.Module Module fMap.WrapperWithParameters fExtract.WrapperWithParameters fTrain.WrapperWithParameters print.WrapperWithParameters Fix print.Clone CloneFrom

Documented in CloneFrom Fix Module Subpipeline

#' Clone projection step from some other subpipeline
#'
#' If the set-up of a projection step is shared among multiple subpipelines, only specify it once and use this cloning function to refer to the first instance.
#' 
#' @param idx_subpipeline integer: index of subpipeline from which to clone the projection result
#'
#' @export
CloneFrom <- function(
  idx_subpipeline
) {
  
  w <- new.env(hash = TRUE)
  w$ref <- idx_subpipeline
  structure(w, class = 'Clone')
}

print.Clone <- function(x, ...) {
  params <- list(...)
  if (!is.null(params$offset)) {
    offset <- strrep('\t', params$offset)
  } else {
    offset <- ''
  }
  .msg(offset)
  if (!is.null(params$subpipelines_list)) {
    print(params$subpipelines_list[[x$ref]]$projection, nobreak = TRUE, no_n_param = TRUE)
    .msg_alt(' [CLONED FROM #', x$ref,']')
  } else {
    .msg_lite('Subpipeline ', x$ref, ' projection step cloned')
  }
  if (!isTRUE(params$nobreak)) {
    .msg('\n')
  }
}

#' Fix parameters for a tool wrapper
#'
#' Takes a projection or clustering tool wrapper and fixes given parameter values for it.
#' 
#' @param tool_name name of a tool for which a wrapper (generated by \code{WrapTool}) exists in the global namespace
#' @param ... named parameter values to fix for training a model using \code{wrapper}
#'
#' @export
Fix <- function(
  tool_name,
  ...
) {
  
  wrapper <- tryCatch(expr = { eval(parse(text = paste0('wrapper.projection.', tool_name), )) }, error = function(e) { NULL })
  if (is.null(wrapper))
    wrapper <- tryCatch(expr = { eval(parse(text = paste0('wrapper.clustering.', tool_name), )) }, error = function(e) { NULL })
  
  params <- list(...)
  .Fix.ValidityChecks(environment())
  
  w <- new.env(hash = TRUE)
  
  w$wrapper <- wrapper
  w$name <- wrapper$name
  w$params <- params
  w$type <- wrapper$type
  structure(w, class = 'WrapperWithParameters')
}

print.WrapperWithParameters <- function(x, ...) {
  .msg('Wrapper for ', x$type, ' tool ')
  .msg_name(x$wrapper$name)
  .msg('\n')
  
  params <- x$wrapper$defaults_to_train
  for (idx_param in seq_along(x$params))
    params[[names(x$params)[idx_param]]] <- x$params[[idx_param]]
  
  if (length(params) > 0) {
    .msg(' with parameters:\n')
    for (idx_param in seq_along(x$params)) {
      .msg_alt('\t', names(x$params[idx_param]), ' = ')
      .msg_val(x$params[[idx_param]], '\n')
    }
  } else {
    .msg('\n')
  }
}

fTrain.WrapperWithParameters <- function(w) {
  function(input, n_param = NULL, n_param_name = NULL, knn = NULL, exprs = NULL) {

    if (!w$wrapper$uses_knn_graph)
      knn <- NULL
    if (!w$wrapper$uses_original_expression_matrix)
      exprs <- NULL
    
    p <- list()
    p[['input']] <- input
    p[['knn']] <- knn
    p[['exprs']] <- exprs
    params <- w$params
    if (!is.null(n_param_name) && !is.null(n_param))
      params[[n_param_name]] <- n_param
    p[['params']] <- params
    
    do.call(w$wrapper$train, p)
  }
}

fExtract.WrapperWithParameters <- function(w) {
  function(model) {
    w$wrapper$extract(model)
  }
}

fMap.WrapperWithParameters <- function(w) {
  function(model, input) {
    w$wrapper$map(model, input)
  }
}

#' Specify *n*-parameter for a tool wrapper
#'
#' Takes a projection or clustering tool wrapper with parameter values and chooses which (if any) of the parameters to turn into an *n*-parameter (non-fixed, to do a parameter sweep over).
#' 
#' @param wrapper_with_parameter tool wrapper with parameters generated by \code{Fix}
#' @param n_param optional string: name of the *n*-parameter
#'
#' @export
Module <- function(
  wrapper_with_parameters,
  n_param = NULL
) {
  
  .Module.ValidityChecks(environment())
  
  m <- new.env(hash = TRUE)
  m$wrapper_with_parameters <- wrapper_with_parameters
  m$name                    <- wrapper_with_parameters$name
  m$type                    <- wrapper_with_parameters$type
  m$n_param                 <- n_param
  m$r_pacakges              <- wrapper_with_parameters$wrapper$r_packages
  m$python_modules          <- wrapper_with_parameters$wrapper$python_modules
  structure(m, class = c('Module', if (m$wrapper_with_parameters$wrapper$type == 'projection') 'ProjectionModule' else if (m$wrapper_with_parameters$wrapper$type == 'clustering') 'ClusteringModule'))
}

print.Module <- function(x, ...) {
  .msg('Module with wrapper for ', x$type, ' tool ')
  .msg_name(x$name)
  if (length(x$wrapper_with_parameters$params) > 0) {
    .msg(' with parameters:\n')
    for (idx_param in seq_along(x$wrapper_with_parameters$params)) {
      .msg_alt('\t', names(x$wrapper_with_parameters$params[idx_param]), ' = ')
      .msg_val(x$wrapper_with_parameters$params[[idx_param]], '\n')
    }
  } else {
    .msg('\n')
  }
  if (!is.null(x$n_param)) {
    .msg_name(x$n_param)
    .msg(' set as n-parameter\n')
  }
}

fTrain.Module <- function(m) {
  function(input, n_param = NULL, knn = NULL, exprs = NULL) {
    f <- fTrain.WrapperWithParameters(m$wrapper_with_parameters)
    f(input, n_param = n_param, n_param_name = m$n_param, knn = knn, exprs = exprs)
  }
}

fExtract.Module <- function(m) {
  function(model) {
    f <- fExtract.WrapperWithParameters(m$wrapper_with_parameters)
    f(model)
  }
}

fMap.Module <- function(m) {
  function(model, input) {
    f <- fMap.WrapperWithParameters(m$wrapper_with_parameters)
    f(model, input)
  }
}

Chain <- function(
  module_list
) {
  
  if ('Module' %in% class(module_list))
    module_list <- list(module_list)
  
  if (class(module_list) == 'Clone')
    return(module_list)
  
  n_modules <- length(module_list)
  which_n_param <- which(purrr::map_lgl(module_list, function(x) !is.null(x$n_param)))
  type <- unique(purrr::map_chr(module_list, function(x) x$type))
  .Chain.ValidityChecks(environment())
  
  m                <- new.env(hash = TRUE)
  m$modules        <- module_list
  m$n_modules      <- n_modules
  m$which_n_param  <- if (length(which_n_param) == 0) NULL else which_n_param
  m$uses_original_expression_matrix <- any(purrr::map_lgl(module_list, function(x) x$wrapper_with_parameters$wrapper$uses_original_expression_matrix))
  m$uses_knn_graph <- any(purrr::map_lgl(module_list, function(x) x$wrapper_with_parameters$wrapper$uses_knn_graph))
  m$type           <- type
  m$name           <- paste0('(', paste0(purrr::map_chr(module_list, function(x) x$name), collapse = '->'), ')')
  m$r_packages     <- unlist(purrr::map(module_list, function(x) x$wrapper_with_parameters$wrapper$r_packages))
  m$python_modules <- unlist(purrr::map(module_list, function(x) x$wrapper_with_parameters$wrapper$python_modules))
  m$uses_python    <- any(purrr::map_lgl(module_list, function(x) x$wrapper_with_parameters$wrapper$uses_python))
  structure(m, class = c('ModuleChain'))
}

print.ModuleChain <- function(x, ...) {
  params <- list(...)
  if (!is.null(params$offset)) {
    offset <- strrep('\t', params$offset)
  } else {
    offset <- ''
  }
  .msg(offset)
  for (idx_module in seq_len(x$n_modules)) {
    .msg_name(x$modules[[idx_module]]$name)
    if (idx_module < x$n_modules)
      .msg(' -> ')
  }
  if (!is.null(x$which_n_param) && !isTRUE(params$no_n_param)) {
    .msg(' (n-parameter: ')
    .msg_alt(x$modules[[x$which_n_param]]$n_param)
    .msg(' from ')
    .msg_alt(x$modules[[x$which_n_param]]$name)
    if (!is.null(params$n_param_value)) {
      .msg(' set to ')
      .msg_alt(params$n_param_value)
    }
    .msg(')')
  }
  if (!isTRUE(params$nobreak)) {
    .msg('\n')
  }
}

fTrain.ModuleChain <- function(m) {
  n_modules <- m$n_modules
  which_n_param <- m$which_n_param
  
  ## To train the final model, we will need to go through the individual chained modules,
  ## train on the output of the previous module and extract the result to pass it on,
  ## until we get to the last module (then we return the model created by that module)
  
  function(input, n_param = NULL, knn = NULL, exprs = NULL, save_intermediates = TRUE, h5_path = NULL, idx.subpipeline = NULL, idx.n_param = NULL, out.intermediates = NULL) {
    
    if (!is.null(out.intermediates)) {
      intermediates <- vector(mode = 'list', length = n_modules - 1)
    }
    
    tmp <- input
    
    for (idx_module in seq_len(n_modules)) {
      this.n_param <- if (!is.null(which_n_param) && which_n_param == idx_module) n_param else NULL
      f <- fTrain.Module(m$modules[[idx_module]])
      tmp <- f(input, n_param = this.n_param, knn = knn, exprs = exprs)
      if (idx_module < n_modules) {
        f <- fExtract.Module(m$modules[[idx_module]])
        tmp <- f(tmp)
        if (save_intermediates) {
          if (!is.null(h5_path)) {
            .h5writeProjectionIntermediate(tmp, h5_path, idx.subpipeline, idx.n_param, idx_module)
          } else if (!is.null(out.intermediates)) {
            intermediates[[idx_module]] <- tmp
          }
        }
          
      }
    }
    if (!is.null(out.intermediates))
      eval.parent(substitute(out.intermediates <- intermediates))
    tmp
  }
}

fExtract.ModuleChain <- function(m) {
  function(model) {
    f <- fExtract.Module(m$modules[[m$n_modules]])
    f(model)
  }
}

fMap.ModuleChain <- function(m) {
  function(model, input) {
    f <- fMap.Module(m$modeuls[[m$n_modules]])
    f(model, input)
  }
}

#' Create subpipeline of a pipeline
#'
#' Creates a single subpipeline, consisting of a projection step, a clustering step or both.
#' 
#' @param projection \code{Module} object for projection (can be composed of a single or multiple tools chained consecutively). Default value is \code{NULL}
#' @param clustering \code{Module} object for clustering (composed of a single clustering tool)
#' 
#' @seealso \code{Module}
#'
#' @export
Subpipeline <- function(
  projection = NULL,
  clustering = NULL
) {
  
  .Subpipeline.ValidityChecks(environment())
  
  s <- new.env(hash = TRUE)
  s$projection <- if (is.null(projection)) NULL else Chain(projection)
  s$clustering <- if (is.null(clustering)) NULL else Chain(clustering)
  
  structure(s, class = 'Subpipeline')
}

print.Subpipeline <- function(x, ...) {
  params <- list(...)
  if (!is.null(params$offset)) {
    n_offset <- params$offset
    offset <- strrep('\t', params$offset)
  } else {
    n_offset <- 0
    offset <- ''
  }
  projection_n_param <- params$projection_n_param
  clustering_n_param <- params$clustering_n_param
  if (isTRUE(params$simple)) {
    if (!is.null(params$bullet_number)) {
      bullet_number <- paste0('(', params$bullet_number, ') ')
    } else {
      bullet_number <- ''
    }
    if (!is.null(x$projection)) {
      .msg(offset, bullet_number)
      print(x$projection, nobreak = TRUE, n_param_value = projection_n_param, subpipelines_list = params$subpipelines_list)
      .msg(' -> ')
      print(x$clustering, n_param_value = clustering_n_param, subpipelines_list = params$subpipelines_list)
    }
  } else {
    .msg(offset)
    .msg('Subpipeline projection step:')
    if (is.null(x$projection)) {
      .msg_val(' none\n')
    } else {
      .msg('\n')
      print(x$projection, offset = n_offset + 1, n_param_value = projection_n_param, subpipelines_list = params$subpipelines_list)
    }
    .msg(offset)
    .msg('Subpipeline clustering step:')
    if (is.null(x$clustering)) {
      .msg_val(' none\n')
    } else {
      .msg('\n')
      print(x$clustering, offset = n_offset + 1, n_param_value = clustering_n_param, subpipelines_list = params$subpipelines_list)
    }
  }
}

MergeOverlaps <- function(l) {
  n <- length(l)
  if (n < 2)
    return(l)
  del <- rep(FALSE, n)
  for (i in 2:n) {
    for (j in 1:(i-1)) {
      if (sum(del[c(i, j)]) == 0 && any(l[[i]] %in% l[[j]])) {
        l[[i]] <- unique(c(l[[i]], l[[j]]))
        l[[j]] <- NA
        del[j] <- TRUE
      }
    }
  }
  l[!del]
}

AlignSubpipelines <- function(
  benchmark,
  subpipelines,
  n_params
) {
  
  .AlignSubpipelines.ValidityChecks(environment())
  
  if (!is.null(subpipelines)) {
    if (!is.list(subpipelines))
      subpipelines <- list(subpipelines)
    benchmark$subpipelines <- subpipelines
    benchmark$n_subpipelines <- length(subpipelines)
    benchmark$clone_groups <- list()
    
    if (is.null(n_params))
      n_params <- rep(list(list(projection = c(), clustering = c())), benchmark$n_subpipelines)
    
    if (!is.null(names(n_params)) && 'projection' %in% names(n_params) || 'clustering' %in% names(n_params))
      n_params <- list(n_params)
    
    for (idx in seq_along(n_params)) {
      if (!'projection' %in% names(n_params[[idx]]) && 'clustering' %in% names(n_params[[idx]])) {
        n_params[[idx]]$projection <- c()#rep(NA, length(n_params[[idx]]$clustering))
        if (IsClone(subpipeline[[idx]]$projection)) {
          n_params[[idx]]$projection <- n_params[[subpipeline[[idx]]$projection$ref]]$projection
          attr(n_params[[idx]]$projection, 'IsClone') <- TRUE
        }
      }
      if (!'clustering' %in% names(n_params[[idx]]) && 'projection' %in% names(n_params[[idx]]))
        n_params[[idx]]$clustering <- c()#rep(NA, length(n_params[[idx]]$projection))
      if (!'clustering' %in% names(n_params[[idx]]) && !'projection' %in% names(n_params[[idx]]))
        n_params[[idx]]$projection <- n_params[[idx]]$clustering <- c()
    }
    
    for (idx in seq_along(subpipelines)) {
      if (!is.null(subpipelines[[idx]]$projection) && !is.null(subpipelines[[idx]]$projection$which_n_param))
        if (is.null(n_params) || is.null(n_params[[idx]]) || is.null(n_params[[idx]]$projection))
          stop(paste0('Missing n-parameter values for projection step of subpipeline ', idx))
      if (!is.null(subpipelines[[idx]]$clustering) && !is.null(subpipelines[[idx]]$clustering$which_n_param))
        if (is.null(n_params) || is.null(n_params[[idx]]) || is.null(n_params[[idx]]$clustering))
          stop(paste0('Missing n-parameter values for clustering step of subpipeline ', idx))
    }
    
    for (idx in seq_along(n_params)) {
      if (isTRUE(n_params[[idx]]$expand)) {
        if (length(n_params[[idx]]$projection) > 0 && length(n_params[[idx]]$clustering) > 0) {
          grid <- expand.grid(n_params[[idx]]$projection, n_params[[idx]]$clustering)
          n_params[[idx]]$projection <- grid[, 1]
          n_params[[idx]]$clustering <- grid[, 2]
        }
      } else {
        lp <- length(n_params[[idx]]$projection)
        lc <- length(n_params[[idx]]$clustering)
        if (lp > 0 && lc > 0 && lp != lc) {
          if (lc > lp) {
            if (lc %% lp == 0) {
              n_params[[idx]]$projection <- rep(n_params[[idx]]$projection, times = lc %/% lp)
            } else {
              stop(paste0('Subpipeline ', idx, 'n-parameter values clustering vector length not a multiple of projection vector length'))
            }
          } else if (lp > lc) {
            if (lp %% lc == 0) {
              n_params[[idx]]$clustering <- rep(n_params[[idx]]$clustering, times = lp %/% lc)
            } else {
              stop(paste0('Subpipeline ', idx, 'n-parameter values projection vector length not a multiple of clustering vector length'))
            }
          }
        }
      }
    }
    
    for (idx in seq_along(subpipelines)) {
      proj <- subpipelines[[idx]]$projection
      idx_clone <- length(benchmark$clone_groups) + 1
      while (IsClone(proj)) {
        if (length(benchmark$clone_groups) < idx_clone)
          benchmark$clone_groups <- c(benchmark$clone_groups, list(c(idx, proj$ref)))
        else
          benchmark$clone_groups[[idx_clone]] <- unique(c(benchmark$clone_groups[[idx_clone]], idx, proj$ref))
        proj <- proj$ref
      }
    }
    benchmark$clone_groups <- MergeOverlaps(benchmark$clone_groups)
    
    
    for (idx in seq_along(subpipelines))
      proj <- subpipelines[[idx]]$projection
      if (IsClone(proj))
        benchmark$clone_groups[[proj$ref]] <- unique(c(benchmark$clone_groups[[proj$ref]], idx, proj$ref))
    
    benchmark$n_params     <- n_params
    
    benchmark$executable <- TRUE
  }
  
  invisible(benchmark)
}

GetSubpipelineTags <- function(subpipeline_list, idx.subpipeline) {
  name_proj <- 
    if (is.null(subpipeline_list[[idx.subpipeline]]$projection)) {
      NULL
    } else if (IsClone(subpipeline_list[[idx.subpipeline]]$projection)) {
      subpipeline_list[[subpipeline_list[[idx.subpipeline]]$projection$ref]]$projection$name
    } else {
      subpipeline_list[[idx.subpipeline]]$projection$name
    }
  name_clus <-
    if (is.null(subpipeline_list[[idx.subpipeline]]$clustering)) {
      NULL
    } else {
      subpipeline_list[[idx.subpipeline]]$clustering$name
    }
  name_subpipeline <-
    if (is.null(name_proj))
      name_clus
    else if (is.null(name_clus))
      name_proj
    else
      paste0(name_proj, ' -> ', name_clus)
  list(projection = name_proj, clustering = name_clus, subpipeline = name_subpipeline)
}
davnovak/SingleBench documentation built on Dec. 19, 2021, 9:10 p.m.